diff --git a/.gitignore b/.gitignore index 038b4f9..a854c9f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ data/facebook_auth.csv data/facebook_auth_ingmar.csv data/facebook_auth_koustuv.csv +data/facebook_auth_tim.csv data/spec_data.tsv data/query_results/ data/all_FB_interests_2016/ diff --git a/package-list.txt b/package-list.txt new file mode 100644 index 0000000..02899b9 --- /dev/null +++ b/package-list.txt @@ -0,0 +1,234 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: win-64 +alabaster=0.7.10=py27h2cab13d_0 +anaconda=custom=py27h689e5c3_0 +anaconda-client=1.6.9=py27_0 +anaconda-project=0.8.2=py27he89e3ca_0 +asn1crypto=0.24.0=py27_0 +astroid=1.6.1=py27_0 +astropy=2.0.3=py27h0c8e037_0 +attrs=17.4.0=py27_0 +babel=2.5.3=py27_0 +backports=1.0=py27h6492d98_1 +backports.functools_lru_cache=1.4=py27h9586e20_1 +backports.shutil_get_terminal_size=1.0.0=py27h1657125_2 +backports.shutil_which=3.5.1=py27_2 +backports_abc=0.5=py27h0ec6b72_0 +beautifulsoup4=4.6.0=py27hc287451_1 +bitarray=0.8.1=py27h0c8e037_1 +bkcharts=0.2=py27h92b6de3_0 +blaze=0.11.3=py27h97e5449_0 +bleach=2.1.2=py27_0 +bokeh=0.12.13=py27h5a33001_0 +boto=2.48.0=py27h1ccb131_1 +bottleneck=1.2.1=py27hde90680_0 +bzip2=1.0.6=h8a7aa22_4 +ca-certificates=2018.03.07=0 +cdecimal=2.3=py27h0c8e037_3 +certifi=2018.4.16=py27_0 +cffi=1.11.4=py27h0c8e037_0 +chardet=3.0.4=py27h56c3b73_1 +click=6.7=py27hb6defca_0 +cloudpickle=0.5.2=py27_1 +clyent=1.2.2=py27h4424948_1 +colorama=0.3.9=py27hdfe4ae1_0 +comtypes=1.1.4=py27_0 +configparser=3.5.0=py27h2fa79a8_0 +console_shortcut=0.1.1=h6bb2dd7_3 +contextlib2=0.5.5=py27h42efda5_0 +cryptography=2.1.4=py27h0628b04_0 +curl=7.58.0=h7a46e7a_0 +cycler=0.10.0=py27h59acbbf_0 +cython=0.27.3=py27h566c365_0 +cytoolz=0.9.0=py27h0c8e037_0 +dask=0.16.1=py27_0 +dask-core=0.16.1=py27_0 +datashape=0.5.4=py27h3d6e61b_0 +decorator=4.2.1=py27_0 +distributed=1.20.2=py27_0 +docutils=0.14=py27h8652d09_0 +entrypoints=0.2.3=py27h0271f2b_2 +enum34=1.1.6=py27h2aa175b_1 +et_xmlfile=1.0.1=py27h1de5d23_0 +fastcache=1.0.2=py27h0c8e037_2 +filelock=2.0.13=py27h49e51d3_0 +flask=0.12.2=py27h30d9212_0 +flask-cors=3.0.3=py27h4926b05_0 +freetype=2.8=hea645e0_1 +funcsigs=1.0.2=py27h8885ae1_0 +functools32=3.2.3.2=py27h0cdbcdb_1 +futures=3.2.0=py27h8b2aecd_0 +get_terminal_size=1.0.0=h38e98db_0 +gevent=1.2.2=py27h1842022_0 +git=2.17.0=hb9891f8_1 +glob2=0.6=py27hd4eee8c_0 +greenlet=0.4.12=py27h32400d3_0 +grin=1.2.1=py27_4 +h5py=2.7.1=py27h2dd4c20_0 +hdf5=1.10.1=h79de857_2 +heapdict=1.0.0=py27_2 +html5lib=1.0.1=py27h5a33001_0 +icc_rt=2017.0.4=h97af966_0 +icu=58.2=h2aa20d9_1 +idna=2.6=py27h1ea29d3_1 +imageio=2.2.0=py27h283db88_0 +imagesize=0.7.1=py27h1482bd8_0 +intel-openmp=2018.0.0=hd92c6cd_8 +ipaddress=1.0.19=py27_0 +ipykernel=4.8.0=py27_0 +ipython=5.4.1=py27_2 +ipython_genutils=0.2.0=py27hbe997df_0 +ipywidgets=7.1.1=py27_0 +isort=4.2.15=py27hdc949c3_0 +itsdangerous=0.24=py27hcf63135_1 +jdcal=1.3=py27h8c72977_0 +jedi=0.11.1=py27_0 +jinja2=2.10=py27hba1794b_0 +jpeg=9b=ha175dff_2 +jsonschema=2.6.0=py27haaf3834_0 +jupyter=1.0.0=py27_4 +jupyter_client=5.2.2=py27_0 +jupyter_console=5.2.0=py27h6ed736b_1 +jupyter_core=4.4.0=py27h1619e65_0 +jupyterlab=0.31.4=py27_0 +jupyterlab_launcher=0.10.2=py27_0 +lazy-object-proxy=1.3.1=py27ha5c8080_0 +libcurl=7.58.0=h7a46e7a_0 +libiconv=1.15=hda2e4ec_7 +libpng=1.6.34=h325896a_0 +libssh2=1.8.0=h77a7533_4 +libtiff=4.0.9=hafacce9_0 +libxml2=2.9.7=h325896a_0 +libxslt=1.1.32=h89dfad8_0 +llvmlite=0.21.0=py27h831ec56_0 +locket=0.2.0=py27h1ca288a_1 +lxml=4.1.1=py27h31b8cb8_1 +lzo=2.10=h0bb7fe3_2 +markupsafe=1.0=py27h9d4480d_1 +matplotlib=2.1.2=py27ha51faf0_0 +mccabe=0.6.1=py27hde0bf6e_1 +menuinst=1.4.11=py27h0c8e037_0 +mistune=0.8.3=py27_0 +mkl=2018.0.1=h2108138_4 +mkl-service=1.1.2=py27h3c6b6b0_4 +mpmath=1.0.0=py27h0d59bc2_2 +msgpack-python=0.5.1=py27hdc96acc_0 +multipledispatch=0.4.9=py27h8ebb51e_0 +nbconvert=5.3.1=py27h7a573cf_0 +nbformat=4.4.0=py27hf49b375_0 +networkx=2.1=py27_0 +nltk=3.2.5=py27h88af825_0 +nose=1.3.7=py27h84c72c6_2 +notebook=5.4.0=py27_0 +numba=0.36.2=np114py27h719e94c_0 +numexpr=2.6.4=py27h20dc147_0 +numpy=1.14.0=py27hfef472a_1 +numpydoc=0.7.0=py27hf7b062b_0 +odo=0.5.1=py27h64810b2_0 +olefile=0.45.1=py27_0 +openpyxl=2.4.10=py27_0 +openssl=1.0.2o=h2c51139_0 +packaging=16.8=py27hae1a450_1 +pandas=0.22.0=py27hc56fc5f_0 +pandoc=1.19.2.1=hb2460c7_1 +pandocfilters=1.4.2=py27h76461d3_1 +parso=0.1.1=py27hd69ea77_0 +partd=0.3.8=py27h1e0692f_0 +path.py=10.5=py27he482d56_0 +pathlib2=2.3.0=py27h0ae272f_0 +patsy=0.5.0=py27_0 +pep8=1.7.1=py27_0 +pickleshare=0.7.4=py27hb5f6335_0 +pillow=5.0.0=py27h901f87c_0 +pip=9.0.1=py27hdaa76b4_4 +pkginfo=1.4.1=py27h6ce81e3_1 +pluggy=0.6.0=py27h89dc50b_0 +ply=3.10=py27h5fb8a85_0 +prompt_toolkit=1.0.15=py27h3a8ec6a_0 +psutil=5.4.3=py27h0c8e037_0 +py=1.5.2=py27ha24bda0_0 +pycodestyle=2.3.1=py27h24cd5d9_0 +pycosat=0.6.3=py27hcd410c5_0 +pycparser=2.18=py27hb43d16c_1 +pycrypto=2.6.1=py27h0c8e037_7 +pycurl=7.43.0.1=py27hc64555f_0 +pyflakes=1.6.0=py27h34e7826_0 +pygments=2.2.0=py27ha50f84f_0 +pylint=1.8.2=py27_0 +pyodbc=4.0.22=py27hc56fc5f_0 +pyopenssl=17.5.0=py27h59156d7_0 +pyparsing=2.2.0=py27hc7d9fa6_1 +pyqt=5.6.0=py27h224ed30_5 +pysocks=1.6.7=py27h59bdd1e_1 +pytables=3.4.2=py27h583b7c0_2 +pytest=3.3.2=py27_0 +python=2.7.14=h8c3f1cb_23 +python-dateutil=2.6.1=py27hbdcc174_1 +pytz=2017.3=py27hca431c1_0 +pywavelets=0.5.2=py27h0dc3f48_0 +pywin32=222=py27h0c8e037_0 +pywinpty=0.5=py27hc56fc5f_1 +pyyaml=3.12=py27ha287073_1 +pyzmq=16.0.3=py27he883654_0 +qt=5.6.2=vc9hc26998b_12 +qtawesome=0.4.4=py27h766b13d_0 +qtconsole=4.3.1=py27h77d40ac_0 +qtpy=1.3.1=py27h1ff2b4b_0 +requests=2.18.4=py27h3159eba_1 +rope=0.10.7=py27hb65afb6_0 +ruamel_yaml=0.15.35=py27h0c8e037_1 +scandir=1.6=py27h518bda0_0 +scikit-image=0.13.1=py27h0c8e037_1 +scikit-learn=0.19.1=py27he00e316_0 +scipy=1.0.0=py27h7cd1477_0 +seaborn=0.8.1=py27hab56d54_0 +send2trash=1.4.2=py27_0 +setuptools=38.4.0=py27_0 +simplegeneric=0.8.1=py27_2 +singledispatch=3.4.0.3=py27h3f9d112_0 +sip=4.18.1=py27h5ec1c1a_2 +six=1.11.0=py27ha5e1701_1 +snowballstemmer=1.2.1=py27h28d3bf7_0 +sortedcollections=0.5.3=py27h21b938c_0 +sortedcontainers=1.5.9=py27_0 +sphinx=1.6.6=py27_0 +sphinxcontrib=1.0=py27h0e2fb95_1 +sphinxcontrib-websupport=1.0.1=py27h0d0f901_1 +spyder=3.2.8=py27_0 +sqlalchemy=1.2.1=py27h0c8e037_0 +sqlite=3.22.0=h8b3e59e_0 +ssl_match_hostname=3.5.0.1=py27hea8a0f4_2 +statsmodels=0.8.0=py27hc77c1fc_0 +subprocess32=3.2.7=py27hcc576e2_0 +sympy=1.1.1=py27hde44fae_0 +tblib=1.3.2=py27h8ae915c_0 +terminado=0.8.1=py27_1 +testpath=0.3.1=py27h1cd488d_0 +tk=8.6.7=h144d9c4_3 +toolz=0.9.0=py27_0 +tornado=4.5.3=py27_0 +traitlets=4.3.2=py27h1b1b3a5_0 +typing=3.6.2=py27h9197bb0_0 +unicodecsv=0.14.1=py27h0bf7bb0_0 +urllib3=1.22=py27hb9f5a07_0 +vc=9=h7299396_1 +vs2008_runtime=9.00.30729.1=hfaea7d5_1 +vs2015_runtime=14.0.25123=3 +wcwidth=0.1.7=py27hb1a0d82_0 +webencodings=0.5.1=py27h4e224a2_1 +werkzeug=0.14.1=py27_0 +wheel=0.30.0=py27ha643586_1 +widgetsnbextension=3.1.0=py27_0 +win_inet_pton=1.0.1=py27hf41312a_1 +win_unicode_console=0.5=py27hc037021_0 +wincertstore=0.2=py27hf04cefb_0 +winpty=0.4.3=4 +wrapt=1.10.11=py27hcd2b27d_0 +xlrd=1.1.0=py27h2b87a7f_1 +xlsxwriter=1.0.2=py27h5ed79b1_0 +xlwings=0.11.5=py27_0 +xlwt=1.3.0=py27h2271735_0 +yaml=0.1.7=h3e6d941_2 +zict=0.1.3=py27h0171463_0 +zlib=1.2.11=hbc2faf4_2 diff --git a/spec-file.txt b/spec-file.txt new file mode 100644 index 0000000..253c37d --- /dev/null +++ b/spec-file.txt @@ -0,0 +1,34 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: win-64 +@EXPLICIT +https://repo.continuum.io/pkgs/main/win-64/ca-certificates-2018.03.07-0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/conda-env-2.6.0-h36134e3_1.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/vs2015_runtime-14.0.25123-3.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/vc-14-h0510ff6_3.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/openssl-1.0.2o-h8ea7d77_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/python-2.7.15-he216670_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/yaml-0.1.7-hc54c509_2.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/asn1crypto-0.24.0-py36_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/certifi-2018.4.16-py36_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/chardet-3.0.4-py36h420ce6e_1.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/console_shortcut-0.1.1-h6bb2dd7_3.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/idna-2.6-py36h148d497_1.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/pycosat-0.6.3-py36h413d8a4_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/pycparser-2.18-py36hd053e01_1.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/pywin32-223-py36hfa6e2cd_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/ruamel_yaml-0.15.35-py36hfa6e2cd_1.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/six-1.11.0-py36h4db2310_1.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/win_inet_pton-1.0.1-py36he67d7fd_1.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/wincertstore-0.2-py36h7fe50ca_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/cffi-1.11.5-py36h945400d_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/menuinst-1.4.11-py36hfa6e2cd_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/pysocks-1.6.8-py36_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/setuptools-39.0.1-py36_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/cryptography-2.2.2-py36hfa6e2cd_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/wheel-0.31.0-py36_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/pip-9.0.3-py36_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/pyopenssl-17.5.0-py36h5b7d817_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/urllib3-1.22-py36h276f60a_0.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/requests-2.18.4-py36h4371aae_1.tar.bz2 +https://repo.continuum.io/pkgs/main/win-64/conda-4.5.1-py27_0.tar.bz2 diff --git a/src/data_processing/example_mine.py b/src/data_processing/example_mine.py index 34db003..e574884 100644 --- a/src/data_processing/example_mine.py +++ b/src/data_processing/example_mine.py @@ -1,5 +1,6 @@ from argparse import ArgumentParser from pysocialwatcher import watcherAPI +from pysocialwatcher.constants import TOKENS import os import re @@ -18,7 +19,8 @@ def query_facebook_audience(access_token, user_id, query_file): response :: DataFrame with query response(s) => one response per row """ watcher = watcherAPI() - watcher.add_token_and_account_number(access_token, user_id) + if((access_token, user_id) not in TOKENS): + watcher.add_token_and_account_number(access_token, user_id) ## execute data collection response = watcher.run_data_collection(query_file) @@ -90,12 +92,12 @@ def query_facebook_audience(access_token, user_id, query_file): def main(): parser = ArgumentParser() parser.add_argument('--auth_file', default='data/facebook_auth_ingmar.csv') - parser.add_argument('--query_file', default='data/newyork_expats.json') + parser.add_argument('--query_file', default='data/queries/newyork_expats.json') parser.add_argument('--out_dir', default='data/') args = parser.parse_args() auth_file = args.auth_file query_file = args.query_file - out_dir = args.out_dir + out_dir = args.out_dir ## set up watcher watcher = watcherAPI() diff --git a/src/data_processing/mine_facebook_audience.py b/src/data_processing/mine_facebook_audience.py index 373ab1f..aa7f202 100644 --- a/src/data_processing/mine_facebook_audience.py +++ b/src/data_processing/mine_facebook_audience.py @@ -5,11 +5,11 @@ @author: stewart """ from argparse import ArgumentParser -from src.data_processing.utils import query_and_write, load_facebook_auth -from pysocialwatcher import constants -import pandas as pd -import json -from ast import literal_eval +from src.data_processing.utils import query_and_write +#from pysocialwatcher import constants +#import pandas as pd +#import json +#from ast import literal_eval def main(): parser = ArgumentParser()