From 379b37d5dca7f36ad3f2df5b85f5413ecf77b1d9 Mon Sep 17 00:00:00 2001 From: Stewart Date: Wed, 16 May 2018 17:44:06 +0200 Subject: [PATCH] first commit --- .gitignore | 2 ++ README.md | 7 +++++++ data/README.md | 7 +++++++ data/facebook_auth_dummy.csv | 1 + data/newyork_expats.json | 25 +++++++++++++++++++++++ requirements.txt | 11 ++++++++++ src/README.md | 5 +++++ src/data_processing/example_mine.py | 31 +++++++++++++++++++++++++++++ 8 files changed, 89 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 data/README.md create mode 100644 data/facebook_auth_dummy.csv create mode 100644 data/newyork_expats.json create mode 100644 requirements.txt create mode 100644 src/README.md create mode 100644 src/data_processing/example_mine.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7cfaa00 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +data/facebook_auth.csv +data/facebook_auth_ingmar.csv \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..dee3d65 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# Assimilation +A study of immigrant assimilation using both Facebook advertising data and a traditional survey. + +## Install +Running the code requires installing packages with `conda`. + +`conda install -r requirements.txt` \ No newline at end of file diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..182e2b8 --- /dev/null +++ b/data/README.md @@ -0,0 +1,7 @@ +# Data +This directory contains input and output data. + +- facebook_auth.csv +Contains access token and user ID. Necessary for mining FB data. +- newyork_expats.json +Contains JSON configuration for sample query (expats living in New York). \ No newline at end of file diff --git a/data/facebook_auth_dummy.csv b/data/facebook_auth_dummy.csv new file mode 100644 index 0000000..f6daad7 --- /dev/null +++ b/data/facebook_auth_dummy.csv @@ -0,0 +1 @@ +ACCESS_TOKEN,USER_ID diff --git a/data/newyork_expats.json b/data/newyork_expats.json new file mode 100644 index 0000000..5dbb9e1 --- /dev/null +++ b/data/newyork_expats.json @@ -0,0 +1,25 @@ +{ "name": "Expats Distribution in New York", + "geo_locations": [ + { + "name": "cities", + "values": [{"key":"2490299"}], + "location_types": ["home"] + } + ], + "behavior": [ + {"or" : [6019673233983], "name" : "Expats (Zimbabwe)"}, + {"or" : [6027149006383], "name" : "Expats (Vietnam)"}, + {"or" : [6026404871583], "name" : "Expats (Venezuela)"}, + {"or" : [6023620475783], "name" : "Expats (US)"}, + {"or" : [6019396649183], "name" : "Expats (United States)"}, + {"or" : [6021354152983], "name" : "Expats (UK)"}, + {"or" : [6019673501783], "name" : "Expats (Uganda)"}, + {"or" : [6023516430783], "name" : "Expats (UAE)"}, + {"or" : [6019366994183], "name" : "Expats (Turkey)"} + ], + "ages_ranges": [ + {"min":18} + ], + "genders": [0] +} + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..37f24d9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +tabulate==0.7.7 +codecov==2.0.5 +numpy==1.11.1 +coloredlogs==5.2 +coverage==4.2 +cython==0.27.3 +pandas==0.19.0 +pytest_cov==2.4.0 +requests==2.11.1 +setuptools==28.2.0 +git+https://github.com/maraujo/pySocialWatcher \ No newline at end of file diff --git a/src/README.md b/src/README.md new file mode 100644 index 0000000..36ec8ff --- /dev/null +++ b/src/README.md @@ -0,0 +1,5 @@ +# Scripts +These are the scripts necessary for collecting and analysing the data. + +- data_processing/ +Mine and clean data. \ No newline at end of file diff --git a/src/data_processing/example_mine.py b/src/data_processing/example_mine.py new file mode 100644 index 0000000..950081a --- /dev/null +++ b/src/data_processing/example_mine.py @@ -0,0 +1,31 @@ +from argparse import ArgumentParser +from pysocialwatcher import watcherAPI +import os + +def main(): + parser = ArgumentParser() + parser.add_argument('--auth_file', default='data/facebook_auth_ingmar.csv') + parser.add_argument('--query_file', default='data/newyork_expats.json') + parser.add_argument('--out_dir', default='data/') + args = parser.parse_args() + auth_file = args.auth_file + query_file = args.query_file + out_dir = args.out_dir + + ## set up watcher + watcher = watcherAPI() + watcher.load_credentials_file(auth_file) + watcher.check_tokens_account_valid() + + ## test query + output = watcher.run_data_collection(query_file) + + ## write to file + if(not os.path.exists(out_dir)): + os.mkdir(out_dir) + out_base = os.path.basename(query_file).replace('.json', '.tsv') + out_file = os.path.join(out_dir, out_base) + output.to_csv(out_file, sep='\t') + +if __name__ == '__main__': + main() \ No newline at end of file