From 379b37d5dca7f36ad3f2df5b85f5413ecf77b1d9 Mon Sep 17 00:00:00 2001
From: Stewart <stewart@demogr.mpg.de>
Date: Wed, 16 May 2018 17:44:06 +0200
Subject: [PATCH] first commit

---
 .gitignore                          |  2 ++
 README.md                           |  7 +++++++
 data/README.md                      |  7 +++++++
 data/facebook_auth_dummy.csv        |  1 +
 data/newyork_expats.json            | 25 +++++++++++++++++++++++
 requirements.txt                    | 11 ++++++++++
 src/README.md                       |  5 +++++
 src/data_processing/example_mine.py | 31 +++++++++++++++++++++++++++++
 8 files changed, 89 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 data/README.md
 create mode 100644 data/facebook_auth_dummy.csv
 create mode 100644 data/newyork_expats.json
 create mode 100644 requirements.txt
 create mode 100644 src/README.md
 create mode 100644 src/data_processing/example_mine.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7cfaa00
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+data/facebook_auth.csv
+data/facebook_auth_ingmar.csv
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..dee3d65
--- /dev/null
+++ b/README.md
@@ -0,0 +1,7 @@
+# Assimilation
+A study of immigrant assimilation using both Facebook advertising data and a traditional survey.
+
+## Install
+Running the code requires installing packages with `conda`.
+
+`conda install -r requirements.txt`
\ No newline at end of file
diff --git a/data/README.md b/data/README.md
new file mode 100644
index 0000000..182e2b8
--- /dev/null
+++ b/data/README.md
@@ -0,0 +1,7 @@
+# Data
+This directory contains input and output data.
+
+- facebook_auth.csv
+Contains access token and user ID. Necessary for mining FB data.
+- newyork_expats.json
+Contains JSON configuration for sample query (expats living in New York).
\ No newline at end of file
diff --git a/data/facebook_auth_dummy.csv b/data/facebook_auth_dummy.csv
new file mode 100644
index 0000000..f6daad7
--- /dev/null
+++ b/data/facebook_auth_dummy.csv
@@ -0,0 +1 @@
+ACCESS_TOKEN,USER_ID 
diff --git a/data/newyork_expats.json b/data/newyork_expats.json
new file mode 100644
index 0000000..5dbb9e1
--- /dev/null
+++ b/data/newyork_expats.json
@@ -0,0 +1,25 @@
+{   "name": "Expats Distribution in New York",
+    "geo_locations": [
+      { 
+        "name": "cities",
+        "values": [{"key":"2490299"}],
+        "location_types": ["home"] 
+      }
+    ],
+    "behavior": [
+        {"or" : [6019673233983], "name" : "Expats (Zimbabwe)"},
+        {"or" : [6027149006383], "name" : "Expats (Vietnam)"},
+        {"or" : [6026404871583], "name" : "Expats (Venezuela)"},
+        {"or" : [6023620475783], "name" : "Expats (US)"},
+        {"or" : [6019396649183], "name" : "Expats (United States)"},
+        {"or" : [6021354152983], "name" : "Expats (UK)"},
+        {"or" : [6019673501783], "name" : "Expats (Uganda)"},
+        {"or" : [6023516430783], "name" : "Expats (UAE)"},
+        {"or" : [6019366994183], "name" : "Expats (Turkey)"}
+    ],
+    "ages_ranges": [
+        {"min":18}
+    ],
+    "genders": [0]
+}
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..37f24d9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+tabulate==0.7.7
+codecov==2.0.5
+numpy==1.11.1
+coloredlogs==5.2
+coverage==4.2
+cython==0.27.3
+pandas==0.19.0
+pytest_cov==2.4.0
+requests==2.11.1
+setuptools==28.2.0
+git+https://github.com/maraujo/pySocialWatcher
\ No newline at end of file
diff --git a/src/README.md b/src/README.md
new file mode 100644
index 0000000..36ec8ff
--- /dev/null
+++ b/src/README.md
@@ -0,0 +1,5 @@
+# Scripts
+These are the scripts necessary for collecting and analysing the data.
+
+- data_processing/
+Mine and clean data.
\ No newline at end of file
diff --git a/src/data_processing/example_mine.py b/src/data_processing/example_mine.py
new file mode 100644
index 0000000..950081a
--- /dev/null
+++ b/src/data_processing/example_mine.py
@@ -0,0 +1,31 @@
+from argparse import ArgumentParser
+from pysocialwatcher import watcherAPI
+import os
+
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('--auth_file', default='data/facebook_auth_ingmar.csv')
+    parser.add_argument('--query_file', default='data/newyork_expats.json')
+    parser.add_argument('--out_dir', default='data/')
+    args = parser.parse_args()
+    auth_file = args.auth_file
+    query_file = args.query_file
+    out_dir = args.out_dir    
+    
+    ## set up watcher
+    watcher = watcherAPI()
+    watcher.load_credentials_file(auth_file)
+    watcher.check_tokens_account_valid()
+
+    ## test query
+    output = watcher.run_data_collection(query_file)
+    
+    ## write to file
+    if(not os.path.exists(out_dir)):
+        os.mkdir(out_dir)
+    out_base = os.path.basename(query_file).replace('.json', '.tsv')
+    out_file = os.path.join(out_dir, out_base)
+    output.to_csv(out_file, sep='\t')
+
+if __name__ == '__main__':
+	main()
\ No newline at end of file