Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import slack\n",
"import nest_asyncio\n",
"import json\n",
"import requests\n",
"import wget\n",
"import datetime, pytz\n",
"import time\n",
"from flask import Flask, request\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"slack_token = \"xoxp-348241436641-719858797702-869402340336-3de750b8dba0aef54fbe5e809f2013da\"\n",
"client = slack.WebClient(token = slack_token)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"test = client.chat_postMessage(\n",
" channel=\"journaltest\",\n",
" text=\"Out here testing this slick slack API\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<slack.web.slack_response.SlackResponse at 0x7f06a804f090>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.chat_delete(\n",
" channel=\"CR9EG2NAF\",\n",
" ts = test[\"ts\"],\n",
" as_user = True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"class Journal:\n",
" def __init__(self,iden,name,ts,url,reaction):\n",
" self.id = iden\n",
" self.name = name\n",
" self.time = ts\n",
" self.url = url\n",
" self.star = reaction\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"for channel in client.channels_list()[\"channels\"]:\n",
" if (channel[\"name\"] == \"journalclub\"):\n",
" journal_channel_id = channel[\"id\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"response = client.conversations_history(\n",
" channel=journal_channel_id,\n",
" limit = 250\n",
")\n",
"all_messages = response[\"messages\"]"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"while (response['has_more']):\n",
" time.sleep(1) # need to wait 1 sec before next call due to rate limits\n",
" response = client.conversations_history(\n",
" channel=journal_channel_id,\n",
" limit=250,\n",
" cursor=response['response_metadata']['next_cursor']\n",
" )\n",
" all_messages += response[\"messages\"]"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'client_msg_id': '3bbdb4a5-c27f-4ad7-8734-c801042f3791',\n",
" 'type': 'message',\n",
" 'text': '<https://www.sciencedirect.com/science/article/pii/S0092867419305550>',\n",
" 'user': 'UA8L5HAAX',\n",
" 'ts': '1561905727.012200',\n",
" 'team': 'TA873CUJV'}"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_messages[-1]"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'client_msg_id': '3bbdb4a5-c27f-4ad7-8734-c801042f3791', 'type': 'message', 'text': '<https://www.sciencedirect.com/science/article/pii/S0092867419305550>', 'user': 'UA8L5HAAX', 'ts': '1561905727.012200', 'team': 'TA873CUJV'}\n"
]
}
],
"source": [
"print(messages[\"messages\"][-1])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for message in messages[\"messages\"]:\n",
" print(\"files\" in message, message)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('2019', 'June')\n"
]
}
],
"source": [
"def timestamp_to_date(ts):\n",
" month = dict({1:\"January\",2:\"February\",3:\"March\",4:\"April\",5:\"May\",6:\"June\",7:\"July\",8:\"August\",9:\"September\",10:\"October\",11:\"November\",12:\"December\"})\n",
" x = datetime.datetime.fromtimestamp(ts)\n",
" x = x.replace(tzinfo = pytz.UTC)\n",
" #directory = month[x.month] + \"_\" + str(x.year)\n",
" return str(x.year),month[x.month]\n",
"\n",
"print(timestamp_to_date(1561905727.012200))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FTZC1D8FM https://slack-files.com/TA873CUJV-FTZC1D8FM-6b527dc4ab\n",
"FUALCN3JL https://slack-files.com/TA873CUJV-FUALCN3JL-ff6042921e\n",
"FTVHJ9B51 https://slack-files.com/TA873CUJV-FTVHJ9B51-4a8ff13324\n"
]
}
],
"source": [
"journals = []\n",
"\n",
"# Store journal articles \n",
"for message in messages[\"messages\"]:\n",
" # Look at messages that have an attachment\n",
" if (\"files\" in message):\n",
" response = client.files_info(file = message[\"files\"][0][\"id\"])\n",
" \n",
" # Make sure that everything is fine and can be accessed properly\n",
" assert(response[\"ok\"] == True)\n",
" \n",
" # Create a journal object instance and store\n",
" info = response[\"file\"]\n",
" iden = info[\"id\"]\n",
" name = info[\"name\"]\n",
" ts = info[\"timestamp\"]\n",
" url = info[\"permalink_public\"]\n",
" reaction = True if (\"reactions\" in message) else False\n",
" journals.append(Journal(iden,name,ts,url,reaction))\n",
" \n",
"for journal in journals:\n",
" out_dir = \"/project/owlmayerTemporary/Sid/slack\"\n",
" \n",
" # Only store journals that are starred\n",
" if (journal.star == True):\n",
" print(journal.id,journal.url)\n",
" \n",
" # Make the message public to so that it can be accessed\n",
" client.files_sharedPublicURL(file = journal.id)\n",
" \n",
" # Determine which directory to put the journal article\n",
" year,month = timestamp_to_date(journal.time)\n",
" out_dir = os.path.join(out_dir,year)\n",
" if (not os.path.exists(out_dir)):\n",
" os.mkdir(out_dir)\n",
" out_dir = os.path.join(out_dir,month)\n",
" if (not os.path.exists(out_dir)):\n",
" os.mkdir(out_dir)\n",
" \n",
" # Obtain the url and download the pdf file\n",
" url = journal.url\n",
" page = requests.get(url)\n",
" soup = BeautifulSoup(page.content, 'html.parser')\n",
" job_elems = soup.find('a', class_='file_header generic_header')\n",
" pdf_url = job_elems.get(\"href\")\n",
" filename = os.path.join(out_dir,job_elems.find('h4').text.replace(\" \",\"_\"))\n",
" \n",
" if (not os.path.exists(filename)):\n",
" filename = wget.download(pdf_url, out = filename)\n",
" \n",
" # Make the message private so it cannot be accessed\n",
" client.files_revokePublicURL(file = journal.id)\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"RTM_READ_DELAY = 1 # 1 second delay between reading from RTM\n",
"EXAMPLE_COMMAND = \"do\"\n",
"MENTION_REGEX = \"^<@(|[WU].+?)>(.*)\"\n",
"\n",
"def parse_bot_commands(slack_events):\n",
" \"\"\"\n",
" Parses a list of events coming from the Slack RTM API to find bot commands.\n",
" If a bot command is found, this function returns a tuple of command and channel.\n",
" If its not found, then this function returns None, None.\n",
" \"\"\"\n",
" for event in slack_events:\n",
" if event[\"type\"] == \"message\" and not \"subtype\" in event:\n",
" user_id, message = parse_direct_mention(event[\"text\"])\n",
" if user_id == starterbot_id:\n",
" return message, event[\"channel\"]\n",
" return None, None\n",
"\n",
"\n",
"def parse_direct_mention(message_text):\n",
" \"\"\"\n",
" Finds a direct mention (a mention that is at the beginning) in message text\n",
" and returns the user ID which was mentioned. If there is no direct mention, returns None\n",
" \"\"\"\n",
" matches = re.search(MENTION_REGEX, message_text)\n",
" # the first group contains the username, the second group contains the remaining message\n",
" return (matches.group(1), matches.group(2).strip()) if matches else (None, None)\n",
"\n",
"def handle_command(command, channel):\n",
" \"\"\"\n",
" Executes bot command if the command is known\n",
" \"\"\"\n",
" # Default response is help text for the user\n",
" default_response = \"Not sure what you mean. Try *{}*.\".format(EXAMPLE_COMMAND)\n",
"\n",
" # Finds and executes the given command, filling in response\n",
" response = None\n",
" # This is where you start to implement more commands!\n",
" if command.startswith(EXAMPLE_COMMAND):\n",
" response = \"Sure...write some more code then I can do that!\"\n",
"\n",
" # Sends the response back to the channel\n",
" slack_client.api_call(\n",
" \"chat.postMessage\",\n",
" channel=channel,\n",
" text=response or default_response\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'WebClient' object has no attribute 'rtm_read'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-19-236c145df0b7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mcommand\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchannel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparse_bot_commands\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrtm_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcommand\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mhandle_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchannel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mRTM_READ_DELAY\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'WebClient' object has no attribute 'rtm_read'"
]
}
],
"source": [
"while True:\n",
" command, channel = parse_bot_commands(client.rtm_read())\n",
" if command:\n",
" handle_command(command, channel)\n",
" time.sleep(RTM_READ_DELAY)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"out_dir = \"/project/owlmayerTemporary/Sid/slack\"\n",
"out_dir = os.path.join(out_dir,\"2019\")\n",
"if (not os.path.exists(out_dir)):\n",
" os.mkdir(out_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for journal in journals:\n",
" client.files_revokePublicURL(file = journal.id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.files_sharedPublicURL(file = \"FRM2X6RT7\")\n",
"\n",
"url = \"https://slack-files.com/TA873CUJV-FRM2HFYD6-37df9146c9\"\n",
"page = requests.get(url)\n",
"soup = BeautifulSoup(page.content, 'html.parser')\n",
"job_elems = soup.find('a', class_='file_header generic_header')\n",
"pdf_url = job_elems.get(\"href\")\n",
"filename = wget.download(pdf_url, out = out_dir)\n",
"client.files_revokePublicURL(file = \"FRM2X6RT7\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.files_sharedPublicURL(file = \"FRM2X6RT7\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.files_revokePublicURL(file = \"FRM2X6RT7\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"os.path.join(out_dir,job_elems[0].find('h4').text.replace(\" \",\"_\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"os.path.exists(os.path.join(out_dir,job_elems[0].find('h4').text))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}