first commit

istewart · May 16, 2018 · 379b37d · 379b37d
commit 379b37d
Show file tree

Hide file tree

Showing 8 changed files with 89 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+data/facebook_auth.csv
+data/facebook_auth_ingmar.csv
diff --git a/README.md b/README.md
@@ -0,0 +1,7 @@
+# Assimilation
+A study of immigrant assimilation using both Facebook advertising data and a traditional survey.
+
+## Install
+Running the code requires installing packages with `conda`.
+
+`conda install -r requirements.txt`
diff --git a/data/README.md b/data/README.md
@@ -0,0 +1,7 @@
+# Data
+This directory contains input and output data.
+
+- facebook_auth.csv
+Contains access token and user ID. Necessary for mining FB data.
+- newyork_expats.json
+Contains JSON configuration for sample query (expats living in New York).
diff --git a/data/facebook_auth_dummy.csv b/data/facebook_auth_dummy.csv
@@ -0,0 +1 @@
+ACCESS_TOKEN,USER_ID 
diff --git a/data/newyork_expats.json b/data/newyork_expats.json
@@ -0,0 +1,25 @@
+{   "name": "Expats Distribution in New York",
+    "geo_locations": [
+      { 
+        "name": "cities",
+        "values": [{"key":"2490299"}],
+        "location_types": ["home"] 
+      }
+    ],
+    "behavior": [
+        {"or" : [6019673233983], "name" : "Expats (Zimbabwe)"},
+        {"or" : [6027149006383], "name" : "Expats (Vietnam)"},
+        {"or" : [6026404871583], "name" : "Expats (Venezuela)"},
+        {"or" : [6023620475783], "name" : "Expats (US)"},
+        {"or" : [6019396649183], "name" : "Expats (United States)"},
+        {"or" : [6021354152983], "name" : "Expats (UK)"},
+        {"or" : [6019673501783], "name" : "Expats (Uganda)"},
+        {"or" : [6023516430783], "name" : "Expats (UAE)"},
+        {"or" : [6019366994183], "name" : "Expats (Turkey)"}
+    ],
+    "ages_ranges": [
+        {"min":18}
+    ],
+    "genders": [0]
+}
+
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,11 @@
+tabulate==0.7.7
+codecov==2.0.5
+numpy==1.11.1
+coloredlogs==5.2
+coverage==4.2
+cython==0.27.3
+pandas==0.19.0
+pytest_cov==2.4.0
+requests==2.11.1
+setuptools==28.2.0
+git+https://github.com/maraujo/pySocialWatcher
diff --git a/src/README.md b/src/README.md
@@ -0,0 +1,5 @@
+# Scripts
+These are the scripts necessary for collecting and analysing the data.
+
+- data_processing/
+Mine and clean data.
diff --git a/src/data_processing/example_mine.py b/src/data_processing/example_mine.py
@@ -0,0 +1,31 @@
+from argparse import ArgumentParser
+from pysocialwatcher import watcherAPI
+import os
+
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('--auth_file', default='data/facebook_auth_ingmar.csv')
+    parser.add_argument('--query_file', default='data/newyork_expats.json')
+    parser.add_argument('--out_dir', default='data/')
+    args = parser.parse_args()
+    auth_file = args.auth_file
+    query_file = args.query_file
+    out_dir = args.out_dir    
+
+    ## set up watcher
+    watcher = watcherAPI()
+    watcher.load_credentials_file(auth_file)
+    watcher.check_tokens_account_valid()
+
+    ## test query
+    output = watcher.run_data_collection(query_file)
+
+    ## write to file
+    if(not os.path.exists(out_dir)):
+        os.mkdir(out_dir)
+    out_base = os.path.basename(query_file).replace('.json', '.tsv')
+    out_file = os.path.join(out_dir, out_base)
+    output.to_csv(out_file, sep='\t')
+
+if __name__ == '__main__':
+	main()