slack_attack.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import slack\n",
    "import nest_asyncio\n",
    "import json\n",
    "import requests\n",
    "import wget\n",
    "import datetime, pytz\n",
    "import time\n",
    "from flask import Flask, request\n",
    "from bs4 import BeautifulSoup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "slack_token = \"xoxp-348241436641-719858797702-869402340336-3de750b8dba0aef54fbe5e809f2013da\"\n",
    "client = slack.WebClient(token = slack_token)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = client.chat_postMessage(\n",
    "  channel=\"journaltest\",\n",
    "  text=\"Out here testing this slick slack API\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<slack.web.slack_response.SlackResponse at 0x7f06a804f090>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client.chat_delete(\n",
    "  channel=\"CR9EG2NAF\",\n",
    "  ts = test[\"ts\"],\n",
    "  as_user = True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Journal:\n",
    "    def __init__(self,iden,name,ts,url,reaction):\n",
    "        self.id = iden\n",
    "        self.name = name\n",
    "        self.time = ts\n",
    "        self.url = url\n",
    "        self.star = reaction\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "for channel in client.channels_list()[\"channels\"]:\n",
    "    if (channel[\"name\"] == \"journalclub\"):\n",
    "        journal_channel_id = channel[\"id\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "response = client.conversations_history(\n",
    "    channel=journal_channel_id,\n",
    "    limit = 250\n",
    ")\n",
    "all_messages = response[\"messages\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "while (response['has_more']):\n",
    "    time.sleep(1)   # need to wait 1 sec before next call due to rate limits\n",
    "    response = client.conversations_history(\n",
    "        channel=journal_channel_id,\n",
    "        limit=250,\n",
    "        cursor=response['response_metadata']['next_cursor']\n",
    "    )\n",
    "    all_messages += response[\"messages\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'client_msg_id': '3bbdb4a5-c27f-4ad7-8734-c801042f3791',\n",
       " 'type': 'message',\n",
       " 'text': '<https://www.sciencedirect.com/science/article/pii/S0092867419305550>',\n",
       " 'user': 'UA8L5HAAX',\n",
       " 'ts': '1561905727.012200',\n",
       " 'team': 'TA873CUJV'}"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_messages[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'client_msg_id': '3bbdb4a5-c27f-4ad7-8734-c801042f3791', 'type': 'message', 'text': '<https://www.sciencedirect.com/science/article/pii/S0092867419305550>', 'user': 'UA8L5HAAX', 'ts': '1561905727.012200', 'team': 'TA873CUJV'}\n"
     ]
    }
   ],
   "source": [
    "print(messages[\"messages\"][-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for message in messages[\"messages\"]:\n",
    "    print(\"files\" in message, message)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('2019', 'June')\n"
     ]
    }
   ],
   "source": [
    "def timestamp_to_date(ts):\n",
    "    month = dict({1:\"January\",2:\"February\",3:\"March\",4:\"April\",5:\"May\",6:\"June\",7:\"July\",8:\"August\",9:\"September\",10:\"October\",11:\"November\",12:\"December\"})\n",
    "    x = datetime.datetime.fromtimestamp(ts)\n",
    "    x = x.replace(tzinfo = pytz.UTC)\n",
    "    #directory = month[x.month] + \"_\" + str(x.year)\n",
    "    return str(x.year),month[x.month]\n",
    "\n",
    "print(timestamp_to_date(1561905727.012200))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FTZC1D8FM https://slack-files.com/TA873CUJV-FTZC1D8FM-6b527dc4ab\n",
      "FUALCN3JL https://slack-files.com/TA873CUJV-FUALCN3JL-ff6042921e\n",
      "FTVHJ9B51 https://slack-files.com/TA873CUJV-FTVHJ9B51-4a8ff13324\n"
     ]
    }
   ],
   "source": [
    "journals = []\n",
    "\n",
    "# Store journal articles \n",
    "for message in messages[\"messages\"]:\n",
    "    # Look at messages that have an attachment\n",
    "    if (\"files\" in message):\n",
    "        response = client.files_info(file = message[\"files\"][0][\"id\"])\n",
    "        \n",
    "        # Make sure that everything is fine and can be accessed properly\n",
    "        assert(response[\"ok\"] == True)\n",
    "        \n",
    "        # Create a journal object instance and store\n",
    "        info = response[\"file\"]\n",
    "        iden = info[\"id\"]\n",
    "        name = info[\"name\"]\n",
    "        ts = info[\"timestamp\"]\n",
    "        url = info[\"permalink_public\"]\n",
    "        reaction = True if (\"reactions\" in message) else False\n",
    "        journals.append(Journal(iden,name,ts,url,reaction))\n",
    "        \n",
    "for journal in journals:\n",
    "    out_dir = \"/project/owlmayerTemporary/Sid/slack\"\n",
    "    \n",
    "    # Only store journals that are starred\n",
    "    if (journal.star == True):\n",
    "        print(journal.id,journal.url)\n",
    "        \n",
    "        # Make the message public to so that it can be accessed\n",
    "        client.files_sharedPublicURL(file = journal.id)\n",
    "        \n",
    "        # Determine which directory to put the journal article\n",
    "        year,month = timestamp_to_date(journal.time)\n",
    "        out_dir = os.path.join(out_dir,year)\n",
    "        if (not os.path.exists(out_dir)):\n",
    "            os.mkdir(out_dir)\n",
    "        out_dir = os.path.join(out_dir,month)\n",
    "        if (not os.path.exists(out_dir)):\n",
    "            os.mkdir(out_dir)\n",
    "        \n",
    "        # Obtain the url and download the pdf file\n",
    "        url = journal.url\n",
    "        page = requests.get(url)\n",
    "        soup = BeautifulSoup(page.content, 'html.parser')\n",
    "        job_elems = soup.find('a', class_='file_header generic_header')\n",
    "        pdf_url = job_elems.get(\"href\")\n",
    "        filename = os.path.join(out_dir,job_elems.find('h4').text.replace(\" \",\"_\"))\n",
    "        \n",
    "        if (not os.path.exists(filename)):\n",
    "            filename = wget.download(pdf_url, out = filename)\n",
    "        \n",
    "        # Make the message private so it cannot be accessed\n",
    "        client.files_revokePublicURL(file = journal.id)\n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "RTM_READ_DELAY = 1 # 1 second delay between reading from RTM\n",
    "EXAMPLE_COMMAND = \"do\"\n",
    "MENTION_REGEX = \"^<@(|[WU].+?)>(.*)\"\n",
    "\n",
    "def parse_bot_commands(slack_events):\n",
    "    \"\"\"\n",
    "        Parses a list of events coming from the Slack RTM API to find bot commands.\n",
    "        If a bot command is found, this function returns a tuple of command and channel.\n",
    "        If its not found, then this function returns None, None.\n",
    "    \"\"\"\n",
    "    for event in slack_events:\n",
    "        if event[\"type\"] == \"message\" and not \"subtype\" in event:\n",
    "            user_id, message = parse_direct_mention(event[\"text\"])\n",
    "            if user_id == starterbot_id:\n",
    "                return message, event[\"channel\"]\n",
    "    return None, None\n",
    "\n",
    "\n",
    "def parse_direct_mention(message_text):\n",
    "    \"\"\"\n",
    "        Finds a direct mention (a mention that is at the beginning) in message text\n",
    "        and returns the user ID which was mentioned. If there is no direct mention, returns None\n",
    "    \"\"\"\n",
    "    matches = re.search(MENTION_REGEX, message_text)\n",
    "    # the first group contains the username, the second group contains the remaining message\n",
    "    return (matches.group(1), matches.group(2).strip()) if matches else (None, None)\n",
    "\n",
    "def handle_command(command, channel):\n",
    "    \"\"\"\n",
    "        Executes bot command if the command is known\n",
    "    \"\"\"\n",
    "    # Default response is help text for the user\n",
    "    default_response = \"Not sure what you mean. Try *{}*.\".format(EXAMPLE_COMMAND)\n",
    "\n",
    "    # Finds and executes the given command, filling in response\n",
    "    response = None\n",
    "    # This is where you start to implement more commands!\n",
    "    if command.startswith(EXAMPLE_COMMAND):\n",
    "        response = \"Sure...write some more code then I can do that!\"\n",
    "\n",
    "    # Sends the response back to the channel\n",
    "    slack_client.api_call(\n",
    "        \"chat.postMessage\",\n",
    "        channel=channel,\n",
    "        text=response or default_response\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'WebClient' object has no attribute 'rtm_read'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-19-236c145df0b7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mcommand\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchannel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparse_bot_commands\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrtm_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mcommand\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m         \u001b[0mhandle_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchannel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mRTM_READ_DELAY\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'WebClient' object has no attribute 'rtm_read'"
     ]
    }
   ],
   "source": [
    "while True:\n",
    "    command, channel = parse_bot_commands(client.rtm_read())\n",
    "    if command:\n",
    "        handle_command(command, channel)\n",
    "    time.sleep(RTM_READ_DELAY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "out_dir = \"/project/owlmayerTemporary/Sid/slack\"\n",
    "out_dir = os.path.join(out_dir,\"2019\")\n",
    "if (not os.path.exists(out_dir)):\n",
    "    os.mkdir(out_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for journal in journals:\n",
    "    client.files_revokePublicURL(file = journal.id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client.files_sharedPublicURL(file = \"FRM2X6RT7\")\n",
    "\n",
    "url = \"https://slack-files.com/TA873CUJV-FRM2HFYD6-37df9146c9\"\n",
    "page = requests.get(url)\n",
    "soup = BeautifulSoup(page.content, 'html.parser')\n",
    "job_elems = soup.find('a', class_='file_header generic_header')\n",
    "pdf_url = job_elems.get(\"href\")\n",
    "filename = wget.download(pdf_url, out = out_dir)\n",
    "client.files_revokePublicURL(file = \"FRM2X6RT7\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client.files_sharedPublicURL(file = \"FRM2X6RT7\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client.files_revokePublicURL(file = \"FRM2X6RT7\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.path.join(out_dir,job_elems[0].find('h4').text.replace(\" \",\"_\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.path.exists(os.path.join(out_dir,job_elems[0].find('h4').text))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}