diff --git a/src/data_processing/check_interest_id_valid.py b/src/data_processing/check_interest_id_valid.py
index faadaae..a545dac 100644
--- a/src/data_processing/check_interest_id_valid.py
+++ b/src/data_processing/check_interest_id_valid.py
@@ -13,6 +13,7 @@
import logging
import os
import math
+import sys
## suppress request INFO messages
logging.getLogger("requests").setLevel(logging.WARNING)
@@ -142,6 +143,8 @@ def interest_name_query_batch(access_token, user_id, interest_ids):
print('rate limit reached at id=%d, sleeping for %d seconds'%(interest_id, RATE_LIMIT_SLEEP_TIME))
sleep(RATE_LIMIT_SLEEP_TIME)
success = True
+ ## try to restart program to dodge rate limit
+# os.execl(sys.executable, sys.executable, *sys.argv)
else:
response_data = response_json['targetingsentencelines']
response_data_matches = filter(lambda x: x['content']=='People Who Match:' or x['content']=='And Must Also Match:',
@@ -232,7 +235,7 @@ def main():
if(len(response_names_i) < interest_names_i):
fixed_names_i = ['NA' if x not in set(response_names_i) else x for x in interest_names_i]
else:
- fixed_names_i = list(interest_names_i)
+ fixed_names_i = list(response_names_i)
# print('%d/%d fixed names %s'%(len(response_names_i), len(interest_names_i), fixed_names_i))
## check for missing names
# if(len(response_names_i) != len(fixed_names_i) or any([name_i=='' for name_i in response_names_i])):
diff --git a/src/data_processing/compare_top_interests.ipynb b/src/data_processing/compare_top_interests.ipynb
index a295b6d..fda2bae 100644
--- a/src/data_processing/compare_top_interests.ipynb
+++ b/src/data_processing/compare_top_interests.ipynb
@@ -956,6 +956,3706 @@
" l_file = '../../data/query_results/%s_top_%d_%s.csv'%(l, top_k, audience_var)\n",
" l_data_k.loc[:, ['interest_name', audience_var]].to_csv(l_file, sep=',', index=False, encoding='utf-8')"
]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Compare Ex-pat interests\n",
+ "We've now mined the top 3000 interests for Hispanic Mexican ex-pats living in the US, so let's see how those stack up against native US Americans."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "original data has 3000 rows\n",
+ "clean data has 2100 rows\n",
+ "2100 results total\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " interest_id | \n",
+ " interest_name | \n",
+ " location | \n",
+ " ages_ranges | \n",
+ " behavior | \n",
+ " dau_audience | \n",
+ " mau_audience | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 6003349442621 | \n",
+ " Entertainment | \n",
+ " US | \n",
+ " {u'max': 65, u'min': 18} | \n",
+ " {u'and': [6023676072183], u'or': [600313321237... | \n",
+ " 26193599 | \n",
+ " 34000000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 6003142505790 | \n",
+ " Facebook | \n",
+ " US | \n",
+ " {u'max': 65, u'min': 18} | \n",
+ " {u'and': [6023676072183], u'or': [600313321237... | \n",
+ " 22969411 | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 6003342621987 | \n",
+ " Social network | \n",
+ " US | \n",
+ " {u'max': 65, u'min': 18} | \n",
+ " {u'and': [6023676072183], u'or': [600313321237... | \n",
+ " 22028399 | \n",
+ " 29000000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 6003167425934 | \n",
+ " Shopping and fashion | \n",
+ " US | \n",
+ " {u'max': 65, u'min': 18} | \n",
+ " {u'and': [6023676072183], u'or': [600313321237... | \n",
+ " 25855999 | \n",
+ " 32000000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 6003985771306 | \n",
+ " Technology | \n",
+ " US | \n",
+ " {u'max': 65, u'min': 18} | \n",
+ " {u'and': [6023676072183], u'or': [600313321237... | \n",
+ " 25855999 | \n",
+ " 32000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " interest_id interest_name location ages_ranges \\\n",
+ "0 6003349442621 Entertainment US {u'max': 65, u'min': 18} \n",
+ "1 6003142505790 Facebook US {u'max': 65, u'min': 18} \n",
+ "2 6003342621987 Social network US {u'max': 65, u'min': 18} \n",
+ "3 6003167425934 Shopping and fashion US {u'max': 65, u'min': 18} \n",
+ "4 6003985771306 Technology US {u'max': 65, u'min': 18} \n",
+ "\n",
+ " behavior dau_audience \\\n",
+ "0 {u'and': [6023676072183], u'or': [600313321237... 26193599 \n",
+ "1 {u'and': [6023676072183], u'or': [600313321237... 22969411 \n",
+ "2 {u'and': [6023676072183], u'or': [600313321237... 22028399 \n",
+ "3 {u'and': [6023676072183], u'or': [600313321237... 25855999 \n",
+ "4 {u'and': [6023676072183], u'or': [600313321237... 25855999 \n",
+ "\n",
+ " mau_audience \n",
+ "0 34000000 \n",
+ "1 30000000 \n",
+ "2 29000000 \n",
+ "3 32000000 \n",
+ "4 32000000 "
+ ]
+ },
+ "execution_count": 121,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "from ast import literal_eval\n",
+ "expat_interests = pd.read_csv('../../dataframe_collecting_1527418768.csv', sep=',', index_col=0)\n",
+ "expat_interests = clean_interest_data(expat_interests)\n",
+ "print('%d results total'%(expat_interests.shape[0]))\n",
+ "expat_interests.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " interest_name | \n",
+ " mau_audience | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 524 | \n",
+ " New Tang Dynasty Television | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 468 | \n",
+ " A.N.S.W.E.R. | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 705 | \n",
+ " Canadian Albums Chart | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1740 | \n",
+ " Province | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1756 | \n",
+ " Act-i-vate | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1833 | \n",
+ " Indian people | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1845 | \n",
+ " Message | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1866 | \n",
+ " Suicide awareness | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 562 | \n",
+ " Allah | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 558 | \n",
+ " Dieting | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 530 | \n",
+ " Wide Open West | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1193 | \n",
+ " Ton | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1919 | \n",
+ " Conservatism | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 441 | \n",
+ " Realidade | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 2082 | \n",
+ " Lakh | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 2005 | \n",
+ " Lady | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 2018 | \n",
+ " Egyptians | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 2054 | \n",
+ " Hispanic and latino american culture | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 331 | \n",
+ " Lewis and Clark-class dry cargo ship | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 325 | \n",
+ " Entreprise | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 305 | \n",
+ " Islam | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 279 | \n",
+ " Fatigue (medical) | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 273 | \n",
+ " EveR | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 266 | \n",
+ " Christianity | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 242 | \n",
+ " Gyms | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 230 | \n",
+ " Muka | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1739 | \n",
+ " Stop consonant | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 709 | \n",
+ " Mystery meat navigation | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 718 | \n",
+ " Zumba | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 739 | \n",
+ " Acne vulgaris | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1350 | \n",
+ " family planning | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 994 | \n",
+ " Hotline | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1386 | \n",
+ " Obesity awareness | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1391 | \n",
+ " Mosque | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1402 | \n",
+ " Communist Party USA | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1414 | \n",
+ " Ampere-hour | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 952 | \n",
+ " Screenshot | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1420 | \n",
+ " Securite | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1282 | \n",
+ " Arabic language | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1458 | \n",
+ " Entity | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1469 | \n",
+ " Inflammation | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1265 | \n",
+ " List of districts of Turkey | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 892 | \n",
+ " Canadian Hot 100 | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1496 | \n",
+ " Bharatiya Janata Party | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 884 | \n",
+ " Muhammad | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 862 | \n",
+ " Sin | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 861 | \n",
+ " Quran | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1091 | \n",
+ " Hashtag | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 841 | \n",
+ " Ultra-prominent peak | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1581 | \n",
+ " Infection | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 809 | \n",
+ " Hiking | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 1637 | \n",
+ " Ramadan | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 751 | \n",
+ " Addiction | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 175 | \n",
+ " Nutrition | \n",
+ " 38000000 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Entertainment | \n",
+ " 34000000 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Hobbies and activities | \n",
+ " 33000000 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Business and industry | \n",
+ " 32000000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Shopping and fashion | \n",
+ " 32000000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Technology | \n",
+ " 32000000 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Music | \n",
+ " 31000000 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Sports and outdoors | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Food and drink | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Facebook | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Sports | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " Entre Rios Province | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Family and relationships | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Social network | \n",
+ " 29000000 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Consumer electronics | \n",
+ " 29000000 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Food | \n",
+ " 29000000 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Instant messaging | \n",
+ " 28000000 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Facebook Messenger | \n",
+ " 28000000 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " Family | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Shopping | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " Reading | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Games | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " Arts and music | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Love | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " Business | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Movies | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " Travel | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Televisions | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Education | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " Time | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " Fitness and wellness | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " Vehicles | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " TV | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " Beauty | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " Automobiles | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Video games | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " Clothing | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " Life | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " Friendship | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " Finance | \n",
+ " 22000000 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " Instagram | \n",
+ " 22000000 | \n",
+ "
\n",
+ " \n",
+ " 94 | \n",
+ " Product (business) | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 51 | \n",
+ " Sales | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 97 | \n",
+ " United States | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " Online shopping | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " Live events | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " Computers | \n",
+ " 20000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " interest_name mau_audience\n",
+ "524 New Tang Dynasty Television 38000000\n",
+ "468 A.N.S.W.E.R. 38000000\n",
+ "705 Canadian Albums Chart 38000000\n",
+ "1740 Province 38000000\n",
+ "1756 Act-i-vate 38000000\n",
+ "1833 Indian people 38000000\n",
+ "1845 Message 38000000\n",
+ "1866 Suicide awareness 38000000\n",
+ "562 Allah 38000000\n",
+ "558 Dieting 38000000\n",
+ "530 Wide Open West 38000000\n",
+ "1193 Ton 38000000\n",
+ "1919 Conservatism 38000000\n",
+ "441 Realidade 38000000\n",
+ "2082 Lakh 38000000\n",
+ "2005 Lady 38000000\n",
+ "2018 Egyptians 38000000\n",
+ "2054 Hispanic and latino american culture 38000000\n",
+ "331 Lewis and Clark-class dry cargo ship 38000000\n",
+ "325 Entreprise 38000000\n",
+ "305 Islam 38000000\n",
+ "279 Fatigue (medical) 38000000\n",
+ "273 EveR 38000000\n",
+ "266 Christianity 38000000\n",
+ "242 Gyms 38000000\n",
+ "230 Muka 38000000\n",
+ "1739 Stop consonant 38000000\n",
+ "709 Mystery meat navigation 38000000\n",
+ "718 Zumba 38000000\n",
+ "739 Acne vulgaris 38000000\n",
+ "1350 family planning 38000000\n",
+ "994 Hotline 38000000\n",
+ "1386 Obesity awareness 38000000\n",
+ "1391 Mosque 38000000\n",
+ "1402 Communist Party USA 38000000\n",
+ "1414 Ampere-hour 38000000\n",
+ "952 Screenshot 38000000\n",
+ "1420 Securite 38000000\n",
+ "1282 Arabic language 38000000\n",
+ "1458 Entity 38000000\n",
+ "1469 Inflammation 38000000\n",
+ "1265 List of districts of Turkey 38000000\n",
+ "892 Canadian Hot 100 38000000\n",
+ "1496 Bharatiya Janata Party 38000000\n",
+ "884 Muhammad 38000000\n",
+ "862 Sin 38000000\n",
+ "861 Quran 38000000\n",
+ "1091 Hashtag 38000000\n",
+ "841 Ultra-prominent peak 38000000\n",
+ "1581 Infection 38000000\n",
+ "809 Hiking 38000000\n",
+ "1637 Ramadan 38000000\n",
+ "751 Addiction 38000000\n",
+ "175 Nutrition 38000000\n",
+ "0 Entertainment 34000000\n",
+ "5 Hobbies and activities 33000000\n",
+ "10 Business and industry 32000000\n",
+ "3 Shopping and fashion 32000000\n",
+ "4 Technology 32000000\n",
+ "16 Music 31000000\n",
+ "8 Sports and outdoors 30000000\n",
+ "15 Food and drink 30000000\n",
+ "1 Facebook 30000000\n",
+ "9 Sports 30000000\n",
+ "37 Entre Rios Province 30000000\n",
+ "13 Family and relationships 30000000\n",
+ "2 Social network 29000000\n",
+ "11 Consumer electronics 29000000\n",
+ "18 Food 29000000\n",
+ "6 Instant messaging 28000000\n",
+ "7 Facebook Messenger 28000000\n",
+ "21 Family 27000000\n",
+ "12 Shopping 27000000\n",
+ "22 Reading 27000000\n",
+ "17 Games 27000000\n",
+ "28 Arts and music 26000000\n",
+ "23 Love 26000000\n",
+ "41 Business 26000000\n",
+ "19 Movies 26000000\n",
+ "20 Travel 26000000\n",
+ "24 Televisions 26000000\n",
+ "30 Education 24000000\n",
+ "40 Time 24000000\n",
+ "36 Fitness and wellness 24000000\n",
+ "34 Vehicles 24000000\n",
+ "45 TV 24000000\n",
+ "31 Beauty 24000000\n",
+ "39 Automobiles 23000000\n",
+ "26 Video games 23000000\n",
+ "29 Clothing 23000000\n",
+ "38 Life 23000000\n",
+ "32 Friendship 23000000\n",
+ "43 Finance 22000000\n",
+ "35 Instagram 22000000\n",
+ "94 Product (business) 21000000\n",
+ "51 Sales 21000000\n",
+ "97 United States 21000000\n",
+ "33 Online shopping 21000000\n",
+ "49 Live events 21000000\n",
+ "25 Computers 20000000"
+ ]
+ },
+ "execution_count": 122,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "audience_var = 'mau_audience'\n",
+ "expat_interests.sort_values(audience_var, inplace=True, ascending=False)\n",
+ "expat_interests.loc[:, ['interest_name', audience_var]].head(n=100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 123,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " interest_name | \n",
+ " mau_audience | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013 | \n",
+ " Tamil cinema | \n",
+ " 270000 | \n",
+ "
\n",
+ " \n",
+ " 972 | \n",
+ " Hacker (computer security) | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ " 1585 | \n",
+ " Export | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ " 1640 | \n",
+ " Call centre | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ " 1737 | \n",
+ " China Central Television | \n",
+ " 240000 | \n",
+ "
\n",
+ " \n",
+ " 2083 | \n",
+ " Departments of France | \n",
+ " 240000 | \n",
+ "
\n",
+ " \n",
+ " 1289 | \n",
+ " Man (Middle-earth) | \n",
+ " 240000 | \n",
+ "
\n",
+ " \n",
+ " 2092 | \n",
+ " Storey | \n",
+ " 230000 | \n",
+ "
\n",
+ " \n",
+ " 1351 | \n",
+ " Bangkok | \n",
+ " 230000 | \n",
+ "
\n",
+ " \n",
+ " 1731 | \n",
+ " Zara (retailer) | \n",
+ " 220000 | \n",
+ "
\n",
+ " \n",
+ " 1822 | \n",
+ " Hard drives | \n",
+ " 210000 | \n",
+ "
\n",
+ " \n",
+ " 1659 | \n",
+ " Sale, Greater Manchester | \n",
+ " 210000 | \n",
+ "
\n",
+ " \n",
+ " 1296 | \n",
+ " Lenovo | \n",
+ " 210000 | \n",
+ "
\n",
+ " \n",
+ " 1688 | \n",
+ " Qatar | \n",
+ " 200000 | \n",
+ "
\n",
+ " \n",
+ " 2093 | \n",
+ " Ultras | \n",
+ " 180000 | \n",
+ "
\n",
+ " \n",
+ " 1745 | \n",
+ " 4G | \n",
+ " 180000 | \n",
+ "
\n",
+ " \n",
+ " 924 | \n",
+ " Huawei | \n",
+ " 170000 | \n",
+ "
\n",
+ " \n",
+ " 2077 | \n",
+ " Reseller | \n",
+ " 170000 | \n",
+ "
\n",
+ " \n",
+ " 1808 | \n",
+ " Delhi | \n",
+ " 150000 | \n",
+ "
\n",
+ " \n",
+ " 1951 | \n",
+ " Turkish language | \n",
+ " 150000 | \n",
+ "
\n",
+ " \n",
+ " 1660 | \n",
+ " IPhone 5 | \n",
+ " 150000 | \n",
+ "
\n",
+ " \n",
+ " 1656 | \n",
+ " Middle Eastern cuisine | \n",
+ " 150000 | \n",
+ "
\n",
+ " \n",
+ " 1055 | \n",
+ " Million | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ " 1937 | \n",
+ " Multi-core processor | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ " 460 | \n",
+ " URL shortening | \n",
+ " 130000 | \n",
+ "
\n",
+ " \n",
+ " 1704 | \n",
+ " Bangladesh | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 791 | \n",
+ " truecaller | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 2035 | \n",
+ " Chinese New Year | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 2072 | \n",
+ " Prophets and messengers in Islam | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 2028 | \n",
+ " Nescafe | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 1190 | \n",
+ " Istanbul | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 1793 | \n",
+ " Cairo | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 1987 | \n",
+ " Holi | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 2008 | \n",
+ " Indonesian language | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 1403 | \n",
+ " Hijab | \n",
+ " 100000 | \n",
+ "
\n",
+ " \n",
+ " 1761 | \n",
+ " Jakarta | \n",
+ " 100000 | \n",
+ "
\n",
+ " \n",
+ " 1468 | \n",
+ " Arab world | \n",
+ " 93000 | \n",
+ "
\n",
+ " \n",
+ " 1722 | \n",
+ " Condominio | \n",
+ " 87000 | \n",
+ "
\n",
+ " \n",
+ " 617 | \n",
+ " Academia | \n",
+ " 85000 | \n",
+ "
\n",
+ " \n",
+ " 1980 | \n",
+ " African Union | \n",
+ " 84000 | \n",
+ "
\n",
+ " \n",
+ " 357 | \n",
+ " Government | \n",
+ " 83000 | \n",
+ "
\n",
+ " \n",
+ " 987 | \n",
+ " Cod | \n",
+ " 83000 | \n",
+ "
\n",
+ " \n",
+ " 1387 | \n",
+ " Sari | \n",
+ " 82000 | \n",
+ "
\n",
+ " \n",
+ " 1593 | \n",
+ " Wire transfer | \n",
+ " 81000 | \n",
+ "
\n",
+ " \n",
+ " 1705 | \n",
+ " Limited company | \n",
+ " 80000 | \n",
+ "
\n",
+ " \n",
+ " 1008 | \n",
+ " Indian Premier League | \n",
+ " 79000 | \n",
+ "
\n",
+ " \n",
+ " 2086 | \n",
+ " Urdu | \n",
+ " 74000 | \n",
+ "
\n",
+ " \n",
+ " 2097 | \n",
+ " Nokia | \n",
+ " 72000 | \n",
+ "
\n",
+ " \n",
+ " 552 | \n",
+ " Facebook for Every Phone | \n",
+ " 71000 | \n",
+ "
\n",
+ " \n",
+ " 1478 | \n",
+ " Heel (shoe) | \n",
+ " 55000 | \n",
+ "
\n",
+ " \n",
+ " 1297 | \n",
+ " Samsung Galaxy S III | \n",
+ " 52000 | \n",
+ "
\n",
+ " \n",
+ " 1698 | \n",
+ " My Talking Tom | \n",
+ " 41000 | \n",
+ "
\n",
+ " \n",
+ " 1644 | \n",
+ " Tamil language | \n",
+ " 37000 | \n",
+ "
\n",
+ " \n",
+ " 2041 | \n",
+ " Salman Khan | \n",
+ " 34000 | \n",
+ "
\n",
+ " \n",
+ " 1943 | \n",
+ " Legal personality | \n",
+ " 32000 | \n",
+ "
\n",
+ " \n",
+ " 950 | \n",
+ " Synthpop | \n",
+ " 17000 | \n",
+ "
\n",
+ " \n",
+ " 2053 | \n",
+ " Indo pop | \n",
+ " 16000 | \n",
+ "
\n",
+ " \n",
+ " 1911 | \n",
+ " Telugu language | \n",
+ " 14000 | \n",
+ "
\n",
+ " \n",
+ " 1861 | \n",
+ " Virat Kohli | \n",
+ " 13000 | \n",
+ "
\n",
+ " \n",
+ " 1558 | \n",
+ " Narendra Modi | \n",
+ " 11000 | \n",
+ "
\n",
+ " \n",
+ " 1915 | \n",
+ " BlackBerry Messenger | \n",
+ " 9300 | \n",
+ "
\n",
+ " \n",
+ " 1368 | \n",
+ " Vodafone | \n",
+ " 9000 | \n",
+ "
\n",
+ " \n",
+ " 1451 | \n",
+ " Flipkart | \n",
+ " 4700 | \n",
+ "
\n",
+ " \n",
+ " 1550 | \n",
+ " Supporters of FC Barcelona | \n",
+ " 4400 | \n",
+ "
\n",
+ " \n",
+ " 2003 | \n",
+ " Indian pop | \n",
+ " 4000 | \n",
+ "
\n",
+ " \n",
+ " 1948 | \n",
+ " CCTV News | \n",
+ " 3600 | \n",
+ "
\n",
+ " \n",
+ " 1527 | \n",
+ " British rock | \n",
+ " 3500 | \n",
+ "
\n",
+ " \n",
+ " 859 | \n",
+ " UC Browser | \n",
+ " 2900 | \n",
+ "
\n",
+ " \n",
+ " 1520 | \n",
+ " Value-added tax | \n",
+ " 2400 | \n",
+ "
\n",
+ " \n",
+ " 1729 | \n",
+ " Oppo Electronics | \n",
+ " 2100 | \n",
+ "
\n",
+ " \n",
+ " 810 | \n",
+ " Gender | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 795 | \n",
+ " Leaf | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 65 | \n",
+ " Books | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 1847 | \n",
+ " Motor vehicle | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 801 | \n",
+ " Card games | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 1851 | \n",
+ " Mining | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 789 | \n",
+ " People's Liberation Army Navy | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 458 | \n",
+ " LG Optimus L4 II | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 1854 | \n",
+ " Sydney | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 1842 | \n",
+ " Songwriter | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 1841 | \n",
+ " Gucci | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " Woman | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " Newspapers | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 2055 | \n",
+ " India News | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 87 | \n",
+ " WhatsApp | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 1850 | \n",
+ " Venezuela | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 792 | \n",
+ " Skiing | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 91 | \n",
+ " Nature | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 803 | \n",
+ " ITunes Store | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 806 | \n",
+ " Source code | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 903 | \n",
+ " Cinema of India | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 908 | \n",
+ " Mumbai | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 1629 | \n",
+ " Types of business entity | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 174 | \n",
+ " Dogs | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 784 | \n",
+ " Vegetarianism | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 807 | \n",
+ " Musician | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 1397 | \n",
+ " Franchising | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 798 | \n",
+ " Hollywood | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 811 | \n",
+ " Carnival | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ " 913 | \n",
+ " Pakistan | \n",
+ " 1000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " interest_name mau_audience\n",
+ "2013 Tamil cinema 270000\n",
+ "972 Hacker (computer security) 250000\n",
+ "1585 Export 250000\n",
+ "1640 Call centre 250000\n",
+ "1737 China Central Television 240000\n",
+ "2083 Departments of France 240000\n",
+ "1289 Man (Middle-earth) 240000\n",
+ "2092 Storey 230000\n",
+ "1351 Bangkok 230000\n",
+ "1731 Zara (retailer) 220000\n",
+ "1822 Hard drives 210000\n",
+ "1659 Sale, Greater Manchester 210000\n",
+ "1296 Lenovo 210000\n",
+ "1688 Qatar 200000\n",
+ "2093 Ultras 180000\n",
+ "1745 4G 180000\n",
+ "924 Huawei 170000\n",
+ "2077 Reseller 170000\n",
+ "1808 Delhi 150000\n",
+ "1951 Turkish language 150000\n",
+ "1660 IPhone 5 150000\n",
+ "1656 Middle Eastern cuisine 150000\n",
+ "1055 Million 140000\n",
+ "1937 Multi-core processor 140000\n",
+ "460 URL shortening 130000\n",
+ "1704 Bangladesh 120000\n",
+ "791 truecaller 120000\n",
+ "2035 Chinese New Year 120000\n",
+ "2072 Prophets and messengers in Islam 120000\n",
+ "2028 Nescafe 120000\n",
+ "1190 Istanbul 120000\n",
+ "1793 Cairo 120000\n",
+ "1987 Holi 120000\n",
+ "2008 Indonesian language 120000\n",
+ "1403 Hijab 100000\n",
+ "1761 Jakarta 100000\n",
+ "1468 Arab world 93000\n",
+ "1722 Condominio 87000\n",
+ "617 Academia 85000\n",
+ "1980 African Union 84000\n",
+ "357 Government 83000\n",
+ "987 Cod 83000\n",
+ "1387 Sari 82000\n",
+ "1593 Wire transfer 81000\n",
+ "1705 Limited company 80000\n",
+ "1008 Indian Premier League 79000\n",
+ "2086 Urdu 74000\n",
+ "2097 Nokia 72000\n",
+ "552 Facebook for Every Phone 71000\n",
+ "1478 Heel (shoe) 55000\n",
+ "1297 Samsung Galaxy S III 52000\n",
+ "1698 My Talking Tom 41000\n",
+ "1644 Tamil language 37000\n",
+ "2041 Salman Khan 34000\n",
+ "1943 Legal personality 32000\n",
+ "950 Synthpop 17000\n",
+ "2053 Indo pop 16000\n",
+ "1911 Telugu language 14000\n",
+ "1861 Virat Kohli 13000\n",
+ "1558 Narendra Modi 11000\n",
+ "1915 BlackBerry Messenger 9300\n",
+ "1368 Vodafone 9000\n",
+ "1451 Flipkart 4700\n",
+ "1550 Supporters of FC Barcelona 4400\n",
+ "2003 Indian pop 4000\n",
+ "1948 CCTV News 3600\n",
+ "1527 British rock 3500\n",
+ "859 UC Browser 2900\n",
+ "1520 Value-added tax 2400\n",
+ "1729 Oppo Electronics 2100\n",
+ "810 Gender 1000\n",
+ "795 Leaf 1000\n",
+ "65 Books 1000\n",
+ "1847 Motor vehicle 1000\n",
+ "801 Card games 1000\n",
+ "1851 Mining 1000\n",
+ "789 People's Liberation Army Navy 1000\n",
+ "458 LG Optimus L4 II 1000\n",
+ "1854 Sydney 1000\n",
+ "1842 Songwriter 1000\n",
+ "1841 Gucci 1000\n",
+ "75 Woman 1000\n",
+ "79 Newspapers 1000\n",
+ "2055 India News 1000\n",
+ "87 WhatsApp 1000\n",
+ "1850 Venezuela 1000\n",
+ "792 Skiing 1000\n",
+ "91 Nature 1000\n",
+ "803 ITunes Store 1000\n",
+ "806 Source code 1000\n",
+ "903 Cinema of India 1000\n",
+ "908 Mumbai 1000\n",
+ "1629 Types of business entity 1000\n",
+ "174 Dogs 1000\n",
+ "784 Vegetarianism 1000\n",
+ "807 Musician 1000\n",
+ "1397 Franchising 1000\n",
+ "798 Hollywood 1000\n",
+ "811 Carnival 1000\n",
+ "913 Pakistan 1000"
+ ]
+ },
+ "execution_count": 123,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "expat_interests.loc[:, ['interest_name', audience_var]].tail(n=100)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Looks like we'll need to filter out the max_pop and min_pop values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 124,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " interest_name | \n",
+ " mau_audience | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Entertainment | \n",
+ " 34000000 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Hobbies and activities | \n",
+ " 33000000 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Business and industry | \n",
+ " 32000000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Shopping and fashion | \n",
+ " 32000000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Technology | \n",
+ " 32000000 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Music | \n",
+ " 31000000 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Sports and outdoors | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Food and drink | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Facebook | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Sports | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " Entre Rios Province | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Family and relationships | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Social network | \n",
+ " 29000000 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Consumer electronics | \n",
+ " 29000000 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Food | \n",
+ " 29000000 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Instant messaging | \n",
+ " 28000000 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Facebook Messenger | \n",
+ " 28000000 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " Family | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Shopping | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " Reading | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Games | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " Arts and music | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Love | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " Business | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Movies | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " Travel | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Televisions | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Education | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " Time | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " Fitness and wellness | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " Vehicles | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " TV | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " Beauty | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " Automobiles | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Video games | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " Clothing | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " Life | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " Friendship | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " Finance | \n",
+ " 22000000 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " Instagram | \n",
+ " 22000000 | \n",
+ "
\n",
+ " \n",
+ " 94 | \n",
+ " Product (business) | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 51 | \n",
+ " Sales | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 97 | \n",
+ " United States | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " Online shopping | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " Live events | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " Computers | \n",
+ " 20000000 | \n",
+ "
\n",
+ " \n",
+ " 61 | \n",
+ " Pets | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 54 | \n",
+ " Politics and social issues | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " Design | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " World | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " Beverages | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 52 | \n",
+ " Online | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 68 | \n",
+ " Photograph | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 60 | \n",
+ " Child | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 53 | \n",
+ " Fashion accessories | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " Association football (Soccer) | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " Photography | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 154 | \n",
+ " Victory | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Facebook for Android | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " Dance | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 64 | \n",
+ " Price | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 130 | \n",
+ " Sales promotion | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 151 | \n",
+ " IPhone | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Home | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 95 | \n",
+ " Motherhood | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 92 | \n",
+ " Home and garden | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 165 | \n",
+ " Twitter | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 69 | \n",
+ " Mobile app | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 66 | \n",
+ " Video | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 63 | \n",
+ " Human | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 90 | \n",
+ " Restaurants | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " Cosmetics | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 88 | \n",
+ " Cooking | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 81 | \n",
+ " Personal finance | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " Image | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 67 | \n",
+ " Current events | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 55 | \n",
+ " Happiness | \n",
+ " 14000000 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " Mobile phones | \n",
+ " 14000000 | \n",
+ "
\n",
+ " \n",
+ " 412 | \n",
+ " Mexico | \n",
+ " 14000000 | \n",
+ "
\n",
+ " \n",
+ " 59 | \n",
+ " Shoes | \n",
+ " 14000000 | \n",
+ "
\n",
+ " \n",
+ " 70 | \n",
+ " Pop music | \n",
+ " 14000000 | \n",
+ "
\n",
+ " \n",
+ " 354 | \n",
+ " Viral video | \n",
+ " 14000000 | \n",
+ "
\n",
+ " \n",
+ " 82 | \n",
+ " Brand | \n",
+ " 14000000 | \n",
+ "
\n",
+ " \n",
+ " 89 | \n",
+ " Gratitude | \n",
+ " 13000000 | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " Coupons | \n",
+ " 13000000 | \n",
+ "
\n",
+ " \n",
+ " 135 | \n",
+ " Rings of Saturn | \n",
+ " 13000000 | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " Free software | \n",
+ " 13000000 | \n",
+ "
\n",
+ " \n",
+ " 119 | \n",
+ " Outdoor recreation | \n",
+ " 13000000 | \n",
+ "
\n",
+ " \n",
+ " 73 | \n",
+ " Rock music | \n",
+ " 13000000 | \n",
+ "
\n",
+ " \n",
+ " 93 | \n",
+ " House | \n",
+ " 13000000 | \n",
+ "
\n",
+ " \n",
+ " 148 | \n",
+ " Facebook for Iphone | \n",
+ " 13000000 | \n",
+ "
\n",
+ " \n",
+ " 86 | \n",
+ " People | \n",
+ " 13000000 | \n",
+ "
\n",
+ " \n",
+ " 158 | \n",
+ " Physical exercise | \n",
+ " 12000000 | \n",
+ "
\n",
+ " \n",
+ " 201 | \n",
+ " Freight transport | \n",
+ " 12000000 | \n",
+ "
\n",
+ " \n",
+ " 84 | \n",
+ " Website | \n",
+ " 12000000 | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " Country | \n",
+ " 12000000 | \n",
+ "
\n",
+ " \n",
+ " 133 | \n",
+ " Alcoholic beverages | \n",
+ " 12000000 | \n",
+ "
\n",
+ " \n",
+ " 58 | \n",
+ " Music videos | \n",
+ " 12000000 | \n",
+ "
\n",
+ " \n",
+ " 410 | \n",
+ " Wish | \n",
+ " 12000000 | \n",
+ "
\n",
+ " \n",
+ " 116 | \n",
+ " Learning | \n",
+ " 12000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " interest_name mau_audience\n",
+ "0 Entertainment 34000000\n",
+ "5 Hobbies and activities 33000000\n",
+ "10 Business and industry 32000000\n",
+ "3 Shopping and fashion 32000000\n",
+ "4 Technology 32000000\n",
+ "16 Music 31000000\n",
+ "8 Sports and outdoors 30000000\n",
+ "15 Food and drink 30000000\n",
+ "1 Facebook 30000000\n",
+ "9 Sports 30000000\n",
+ "37 Entre Rios Province 30000000\n",
+ "13 Family and relationships 30000000\n",
+ "2 Social network 29000000\n",
+ "11 Consumer electronics 29000000\n",
+ "18 Food 29000000\n",
+ "6 Instant messaging 28000000\n",
+ "7 Facebook Messenger 28000000\n",
+ "21 Family 27000000\n",
+ "12 Shopping 27000000\n",
+ "22 Reading 27000000\n",
+ "17 Games 27000000\n",
+ "28 Arts and music 26000000\n",
+ "23 Love 26000000\n",
+ "41 Business 26000000\n",
+ "19 Movies 26000000\n",
+ "20 Travel 26000000\n",
+ "24 Televisions 26000000\n",
+ "30 Education 24000000\n",
+ "40 Time 24000000\n",
+ "36 Fitness and wellness 24000000\n",
+ "34 Vehicles 24000000\n",
+ "45 TV 24000000\n",
+ "31 Beauty 24000000\n",
+ "39 Automobiles 23000000\n",
+ "26 Video games 23000000\n",
+ "29 Clothing 23000000\n",
+ "38 Life 23000000\n",
+ "32 Friendship 23000000\n",
+ "43 Finance 22000000\n",
+ "35 Instagram 22000000\n",
+ "94 Product (business) 21000000\n",
+ "51 Sales 21000000\n",
+ "97 United States 21000000\n",
+ "33 Online shopping 21000000\n",
+ "49 Live events 21000000\n",
+ "25 Computers 20000000\n",
+ "61 Pets 19000000\n",
+ "54 Politics and social issues 19000000\n",
+ "46 Design 19000000\n",
+ "44 World 19000000\n",
+ "47 Beverages 19000000\n",
+ "52 Online 18000000\n",
+ "68 Photograph 18000000\n",
+ "60 Child 18000000\n",
+ "53 Fashion accessories 18000000\n",
+ "42 Association football (Soccer) 18000000\n",
+ "50 Photography 17000000\n",
+ "154 Victory 17000000\n",
+ "14 Facebook for Android 17000000\n",
+ "77 Dance 17000000\n",
+ "64 Price 17000000\n",
+ "130 Sales promotion 16000000\n",
+ "151 IPhone 16000000\n",
+ "99 Home 16000000\n",
+ "95 Motherhood 16000000\n",
+ "92 Home and garden 16000000\n",
+ "165 Twitter 16000000\n",
+ "69 Mobile app 16000000\n",
+ "66 Video 16000000\n",
+ "63 Human 16000000\n",
+ "90 Restaurants 15000000\n",
+ "56 Cosmetics 15000000\n",
+ "88 Cooking 15000000\n",
+ "81 Personal finance 15000000\n",
+ "78 Image 15000000\n",
+ "67 Current events 15000000\n",
+ "55 Happiness 14000000\n",
+ "27 Mobile phones 14000000\n",
+ "412 Mexico 14000000\n",
+ "59 Shoes 14000000\n",
+ "70 Pop music 14000000\n",
+ "354 Viral video 14000000\n",
+ "82 Brand 14000000\n",
+ "89 Gratitude 13000000\n",
+ "103 Coupons 13000000\n",
+ "135 Rings of Saturn 13000000\n",
+ "62 Free software 13000000\n",
+ "119 Outdoor recreation 13000000\n",
+ "73 Rock music 13000000\n",
+ "93 House 13000000\n",
+ "148 Facebook for Iphone 13000000\n",
+ "86 People 13000000\n",
+ "158 Physical exercise 12000000\n",
+ "201 Freight transport 12000000\n",
+ "84 Website 12000000\n",
+ "80 Country 12000000\n",
+ "133 Alcoholic beverages 12000000\n",
+ "58 Music videos 12000000\n",
+ "410 Wish 12000000\n",
+ "116 Learning 12000000"
+ ]
+ },
+ "execution_count": 124,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.set_option('display.max_rows', 100)\n",
+ "max_expat_audience = expat_interests.loc[:, audience_var].max()\n",
+ "min_expat_audience = expat_interests.loc[:, audience_var].min()\n",
+ "expat_interests_clean = expat_interests[(expat_interests.loc[:, audience_var] < max_expat_audience) &\n",
+ " (expat_interests.loc[:, audience_var] > min_expat_audience)]\n",
+ "expat_interests_clean.loc[:, ['interest_name', audience_var]].head(n=100)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "These all look pretty reasonable! Who doesn't like `Coupons`?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's match the distribution with the American native interests, normalize for population size and then compare the distributions (overlapping histogram?? yes)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "original data has 4834 rows\n",
+ "clean data has 4834 rows\n"
+ ]
+ }
+ ],
+ "source": [
+ "US_MX_interests = pd.read_csv('../../data/query_results/US_MX_native_interests_top_3000_interest_new_tmp.tsv', sep='\\t', index_col=False)\n",
+ "US_MX_interests = clean_interest_data(US_MX_interests)\n",
+ "US_interests = US_MX_interests[US_MX_interests.loc[:, 'location'] == 'US']\n",
+ "MX_interests = US_MX_interests[US_MX_interests.loc[:, 'location'] == 'MX']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# get rid of max values\n",
+ "max_US_audience = US_interests.loc[:, audience_var].max()\n",
+ "max_MX_audience = MX_interests.loc[:, audience_var].max()\n",
+ "min_US_audience = US_interests.loc[:, audience_var].min()\n",
+ "min_MX_audience = MX_interests.loc[:, audience_var].min()\n",
+ "US_interests = US_interests[(US_interests.loc[:, audience_var] < max_US_audience) &\n",
+ " (US_interests.loc[:, audience_var] > min_US_audience)]\n",
+ "MX_interests = MX_interests[(MX_interests.loc[:, audience_var] < max_MX_audience) &\n",
+ " (MX_interests.loc[:, audience_var] > min_MX_audience)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " interest_name | \n",
+ " mau_audience | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " Sports and outdoors | \n",
+ " 103000000 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Automobiles | \n",
+ " 77000000 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Politics and social issues | \n",
+ " 70000000 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Live events | \n",
+ " 68000000 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " Home and garden | \n",
+ " 57000000 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Li Ke | \n",
+ " 48000000 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " Homo sapiens | \n",
+ " 45000000 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Imagem | \n",
+ " 41000000 | \n",
+ "
\n",
+ " \n",
+ " 1368 | \n",
+ " Walmart | \n",
+ " 39000000 | \n",
+ "
\n",
+ " \n",
+ " 58 | \n",
+ " Pinterest | \n",
+ " 37000000 | \n",
+ "
\n",
+ " \n",
+ " 904 | \n",
+ " Hu Ge | \n",
+ " 35000000 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " Real estate | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 684 | \n",
+ " People (magazine) | \n",
+ " 30000000 | \n",
+ "
\n",
+ " \n",
+ " 604 | \n",
+ " U.S. state | \n",
+ " 29000000 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " Women's clothing | \n",
+ " 29000000 | \n",
+ "
\n",
+ " \n",
+ " 126 | \n",
+ " Meme | \n",
+ " 28000000 | \n",
+ "
\n",
+ " \n",
+ " 1314 | \n",
+ " Republican Party (United States) | \n",
+ " 28000000 | \n",
+ "
\n",
+ " \n",
+ " 1374 | \n",
+ " Mother's Day | \n",
+ " 28000000 | \n",
+ "
\n",
+ " \n",
+ " 3362 | \n",
+ " Pandora Radio | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 560 | \n",
+ " Donald Trump | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 498 | \n",
+ " Hollywood | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Software | \n",
+ " 27000000 | \n",
+ "
\n",
+ " \n",
+ " 548 | \n",
+ " Symptom | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " Netflix | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 1302 | \n",
+ " Popular music | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " Wealth | \n",
+ " 26000000 | \n",
+ "
\n",
+ " \n",
+ " 52 | \n",
+ " Streaming media | \n",
+ " 25000000 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " Spotify | \n",
+ " 25000000 | \n",
+ "
\n",
+ " \n",
+ " 630 | \n",
+ " Patient | \n",
+ " 25000000 | \n",
+ "
\n",
+ " \n",
+ " 894 | \n",
+ " Democratic Party (United States) | \n",
+ " 25000000 | \n",
+ "
\n",
+ " \n",
+ " 82 | \n",
+ " Exhibition game | \n",
+ " 25000000 | \n",
+ "
\n",
+ " \n",
+ " 688 | \n",
+ " BuzzFeed | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 474 | \n",
+ " TV game shows | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 1404 | \n",
+ " Florida | \n",
+ " 24000000 | \n",
+ "
\n",
+ " \n",
+ " 1634 | \n",
+ " OMG (song) | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 862 | \n",
+ " National Football League | \n",
+ " 23000000 | \n",
+ "
\n",
+ " \n",
+ " 208 | \n",
+ " Virus | \n",
+ " 22000000 | \n",
+ "
\n",
+ " \n",
+ " 146 | \n",
+ " Trucks | \n",
+ " 22000000 | \n",
+ "
\n",
+ " \n",
+ " 1494 | \n",
+ " Real estate broker | \n",
+ " 22000000 | \n",
+ "
\n",
+ " \n",
+ " 158 | \n",
+ " High school | \n",
+ " 22000000 | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " Vacations | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 1364 | \n",
+ " Texas | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 106 | \n",
+ " Military | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 218 | \n",
+ " North America | \n",
+ " 21000000 | \n",
+ "
\n",
+ " \n",
+ " 1058 | \n",
+ " College football | \n",
+ " 20000000 | \n",
+ "
\n",
+ " \n",
+ " 462 | \n",
+ " Grandparent | \n",
+ " 20000000 | \n",
+ "
\n",
+ " \n",
+ " 3478 | \n",
+ " The Weather Channel | \n",
+ " 20000000 | \n",
+ "
\n",
+ " \n",
+ " 2386 | \n",
+ " Variety show | \n",
+ " 20000000 | \n",
+ "
\n",
+ " \n",
+ " 290 | \n",
+ " President of the United States | \n",
+ " 20000000 | \n",
+ "
\n",
+ " \n",
+ " 782 | \n",
+ " Tasty | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 152 | \n",
+ " Hunting | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Barbecue | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 1502 | \n",
+ " National Football League on television | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 312 | \n",
+ " Home (2009 film) | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 316 | \n",
+ " Truth | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 1628 | \n",
+ " County (United States) | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 276 | \n",
+ " Job | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 348 | \n",
+ " Fishing | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 1962 | \n",
+ " Limited liability company | \n",
+ " 19000000 | \n",
+ "
\n",
+ " \n",
+ " 370 | \n",
+ " Beyin | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 528 | \n",
+ " New York City | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 588 | \n",
+ " Nursing | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 762 | \n",
+ " California | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 124 | \n",
+ " Health care | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 230 | \n",
+ " Cancer awareness | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 92 | \n",
+ " Dinner | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 68 | \n",
+ " Renting | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 2056 | \n",
+ " County seat | \n",
+ " 18000000 | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " Short Message Service | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 3148 | \n",
+ " NBC | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " Music download | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 1338 | \n",
+ " Quiz | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 278 | \n",
+ " Farm | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 758 | \n",
+ " Swimsuit | \n",
+ " 17000000 | \n",
+ "
\n",
+ " \n",
+ " 2404 | \n",
+ " Groupon | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 532 | \n",
+ " National Basketball Association | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 3076 | \n",
+ " American folk music | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 222 | \n",
+ " Acting | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 1136 | \n",
+ " Boyfriend | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 1142 | \n",
+ " Food Network | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 3280 | \n",
+ " Genius | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 710 | \n",
+ " Internet meme | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 380 | \n",
+ " Camping | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 2650 | \n",
+ " Rugby league | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 994 | \n",
+ " Window | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 280 | \n",
+ " Bathing | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 972 | \n",
+ " Character (arts) | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 2756 | \n",
+ " America (band) | \n",
+ " 16000000 | \n",
+ "
\n",
+ " \n",
+ " 1198 | \n",
+ " Popular culture | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 734 | \n",
+ " Grilling | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 324 | \n",
+ " Day school | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 1694 | \n",
+ " Try | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 472 | \n",
+ " Lawyer | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 302 | \n",
+ " Sense | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 314 | \n",
+ " Performing arts | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 86 | \n",
+ " Phil Spector | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 4592 | \n",
+ " Old age | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Men's clothing | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 70 | \n",
+ " Chef | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ " 3842 | \n",
+ " Medical sign | \n",
+ " 15000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " interest_name mau_audience\n",
+ "2 Sports and outdoors 103000000\n",
+ "12 Automobiles 77000000\n",
+ "16 Politics and social issues 70000000\n",
+ "14 Live events 68000000\n",
+ "20 Home and garden 57000000\n",
+ "24 Li Ke 48000000\n",
+ "28 Homo sapiens 45000000\n",
+ "26 Imagem 41000000\n",
+ "1368 Walmart 39000000\n",
+ "58 Pinterest 37000000\n",
+ "904 Hu Ge 35000000\n",
+ "38 Real estate 30000000\n",
+ "684 People (magazine) 30000000\n",
+ "604 U.S. state 29000000\n",
+ "22 Women's clothing 29000000\n",
+ "126 Meme 28000000\n",
+ "1314 Republican Party (United States) 28000000\n",
+ "1374 Mother's Day 28000000\n",
+ "3362 Pandora Radio 27000000\n",
+ "560 Donald Trump 27000000\n",
+ "498 Hollywood 27000000\n",
+ "18 Software 27000000\n",
+ "548 Symptom 26000000\n",
+ "98 Netflix 26000000\n",
+ "1302 Popular music 26000000\n",
+ "62 Wealth 26000000\n",
+ "52 Streaming media 25000000\n",
+ "36 Spotify 25000000\n",
+ "630 Patient 25000000\n",
+ "894 Democratic Party (United States) 25000000\n",
+ "82 Exhibition game 25000000\n",
+ "688 BuzzFeed 24000000\n",
+ "474 TV game shows 24000000\n",
+ "1404 Florida 24000000\n",
+ "1634 OMG (song) 23000000\n",
+ "862 National Football League 23000000\n",
+ "208 Virus 22000000\n",
+ "146 Trucks 22000000\n",
+ "1494 Real estate broker 22000000\n",
+ "158 High school 22000000\n",
+ "48 Vacations 21000000\n",
+ "1364 Texas 21000000\n",
+ "106 Military 21000000\n",
+ "218 North America 21000000\n",
+ "1058 College football 20000000\n",
+ "462 Grandparent 20000000\n",
+ "3478 The Weather Channel 20000000\n",
+ "2386 Variety show 20000000\n",
+ "290 President of the United States 20000000\n",
+ "782 Tasty 19000000\n",
+ "152 Hunting 19000000\n",
+ "102 Barbecue 19000000\n",
+ "1502 National Football League on television 19000000\n",
+ "312 Home (2009 film) 19000000\n",
+ "316 Truth 19000000\n",
+ "1628 County (United States) 19000000\n",
+ "276 Job 19000000\n",
+ "348 Fishing 19000000\n",
+ "1962 Limited liability company 19000000\n",
+ "370 Beyin 18000000\n",
+ "528 New York City 18000000\n",
+ "588 Nursing 18000000\n",
+ "762 California 18000000\n",
+ "124 Health care 18000000\n",
+ "230 Cancer awareness 18000000\n",
+ "92 Dinner 18000000\n",
+ "68 Renting 18000000\n",
+ "2056 County seat 18000000\n",
+ "50 Short Message Service 17000000\n",
+ "3148 NBC 17000000\n",
+ "56 Music download 17000000\n",
+ "1338 Quiz 17000000\n",
+ "278 Farm 17000000\n",
+ "758 Swimsuit 17000000\n",
+ "2404 Groupon 16000000\n",
+ "532 National Basketball Association 16000000\n",
+ "3076 American folk music 16000000\n",
+ "222 Acting 16000000\n",
+ "1136 Boyfriend 16000000\n",
+ "1142 Food Network 16000000\n",
+ "3280 Genius 16000000\n",
+ "710 Internet meme 16000000\n",
+ "380 Camping 16000000\n",
+ "2650 Rugby league 16000000\n",
+ "994 Window 16000000\n",
+ "280 Bathing 16000000\n",
+ "972 Character (arts) 16000000\n",
+ "2756 America (band) 16000000\n",
+ "1198 Popular culture 15000000\n",
+ "734 Grilling 15000000\n",
+ "324 Day school 15000000\n",
+ "1694 Try 15000000\n",
+ "472 Lawyer 15000000\n",
+ "302 Sense 15000000\n",
+ "314 Performing arts 15000000\n",
+ "86 Phil Spector 15000000\n",
+ "4592 Old age 15000000\n",
+ "30 Men's clothing 15000000\n",
+ "70 Chef 15000000\n",
+ "3842 Medical sign 15000000"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "US_interests.sort_values(audience_var, inplace=True, ascending=False)\n",
+ "US_interests.loc[:, ['interest_name', audience_var]].head(n=100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " interest_name | \n",
+ " mau_audience | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2798 | \n",
+ " African Union | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 3682 | \n",
+ " Space age pop | \n",
+ " 120000 | \n",
+ "
\n",
+ " \n",
+ " 4248 | \n",
+ " Worldbeat | \n",
+ " 110000 | \n",
+ "
\n",
+ " \n",
+ " 2070 | \n",
+ " Franco De Vita | \n",
+ " 110000 | \n",
+ "
\n",
+ " \n",
+ " 4788 | \n",
+ " Public sector | \n",
+ " 110000 | \n",
+ "
\n",
+ " \n",
+ " 1444 | \n",
+ " Samsung Galaxy S III | \n",
+ " 110000 | \n",
+ "
\n",
+ " \n",
+ " 3586 | \n",
+ " Handball | \n",
+ " 110000 | \n",
+ "
\n",
+ " \n",
+ " 3724 | \n",
+ " International News Service v. Associated Press | \n",
+ " 110000 | \n",
+ "
\n",
+ " \n",
+ " 1798 | \n",
+ " Heel (shoe) | \n",
+ " 110000 | \n",
+ "
\n",
+ " \n",
+ " 3600 | \n",
+ " Ronaldinho | \n",
+ " 100000 | \n",
+ "
\n",
+ " \n",
+ " 4776 | \n",
+ " Ottoman Empire | \n",
+ " 100000 | \n",
+ "
\n",
+ " \n",
+ " 1778 | \n",
+ " Arab world | \n",
+ " 100000 | \n",
+ "
\n",
+ " \n",
+ " 2724 | \n",
+ " Legal personality | \n",
+ " 98000 | \n",
+ "
\n",
+ " \n",
+ " 4448 | \n",
+ " Comune | \n",
+ " 93000 | \n",
+ "
\n",
+ " \n",
+ " 3864 | \n",
+ " Member states of the Arab League | \n",
+ " 92000 | \n",
+ "
\n",
+ " \n",
+ " 3348 | \n",
+ " Tunisia | \n",
+ " 92000 | \n",
+ "
\n",
+ " \n",
+ " 3672 | \n",
+ " Jangle pop | \n",
+ " 91000 | \n",
+ "
\n",
+ " \n",
+ " 2242 | \n",
+ " Spain national under-21 football team | \n",
+ " 88000 | \n",
+ "
\n",
+ " \n",
+ " 1590 | \n",
+ " F.C. Porto | \n",
+ " 85000 | \n",
+ "
\n",
+ " \n",
+ " 4470 | \n",
+ " Juventus F.C. | \n",
+ " 85000 | \n",
+ "
\n",
+ " \n",
+ " 3680 | \n",
+ " Malaysian pop | \n",
+ " 85000 | \n",
+ "
\n",
+ " \n",
+ " 3178 | \n",
+ " Argentina national football team | \n",
+ " 85000 | \n",
+ "
\n",
+ " \n",
+ " 3480 | \n",
+ " Deepika Padukone | \n",
+ " 81000 | \n",
+ "
\n",
+ " \n",
+ " 3196 | \n",
+ " Kuwait | \n",
+ " 79000 | \n",
+ "
\n",
+ " \n",
+ " 3522 | \n",
+ " Sheikh | \n",
+ " 78000 | \n",
+ "
\n",
+ " \n",
+ " 3518 | \n",
+ " States and union territories of India | \n",
+ " 77000 | \n",
+ "
\n",
+ " \n",
+ " 1432 | \n",
+ " El Clasico | \n",
+ " 76000 | \n",
+ "
\n",
+ " \n",
+ " 4270 | \n",
+ " Sport Club Internacional | \n",
+ " 75000 | \n",
+ "
\n",
+ " \n",
+ " 4478 | \n",
+ " Ali | \n",
+ " 73000 | \n",
+ "
\n",
+ " \n",
+ " 3788 | \n",
+ " Renault | \n",
+ " 70000 | \n",
+ "
\n",
+ " \n",
+ " 4010 | \n",
+ " Audi A7 | \n",
+ " 69000 | \n",
+ "
\n",
+ " \n",
+ " 3198 | \n",
+ " Thai baht | \n",
+ " 68000 | \n",
+ "
\n",
+ " \n",
+ " 4122 | \n",
+ " A.C. Milan | \n",
+ " 66000 | \n",
+ "
\n",
+ " \n",
+ " 3008 | \n",
+ " Urdu | \n",
+ " 66000 | \n",
+ "
\n",
+ " \n",
+ " 2234 | \n",
+ " My Talking Tom | \n",
+ " 66000 | \n",
+ "
\n",
+ " \n",
+ " 3086 | \n",
+ " Shah Rukh Khan | \n",
+ " 65000 | \n",
+ "
\n",
+ " \n",
+ " 4476 | \n",
+ " Quezon City | \n",
+ " 65000 | \n",
+ "
\n",
+ " \n",
+ " 3896 | \n",
+ " Cable | \n",
+ " 65000 | \n",
+ "
\n",
+ " \n",
+ " 3954 | \n",
+ " Vivo (telecommunications) | \n",
+ " 61000 | \n",
+ "
\n",
+ " \n",
+ " 1452 | \n",
+ " Peso | \n",
+ " 58000 | \n",
+ "
\n",
+ " \n",
+ " 2892 | \n",
+ " Nescafe | \n",
+ " 58000 | \n",
+ "
\n",
+ " \n",
+ " 4386 | \n",
+ " 100 metres | \n",
+ " 57000 | \n",
+ "
\n",
+ " \n",
+ " 3534 | \n",
+ " SMS (hydrology software) | \n",
+ " 56000 | \n",
+ "
\n",
+ " \n",
+ " 3326 | \n",
+ " Algeria | \n",
+ " 53000 | \n",
+ "
\n",
+ " \n",
+ " 3378 | \n",
+ " Atletico Madrid | \n",
+ " 53000 | \n",
+ "
\n",
+ " \n",
+ " 4338 | \n",
+ " Xiaomi | \n",
+ " 51000 | \n",
+ "
\n",
+ " \n",
+ " 3866 | \n",
+ " Rede Globo | \n",
+ " 49000 | \n",
+ "
\n",
+ " \n",
+ " 3578 | \n",
+ " Russian pop | \n",
+ " 48000 | \n",
+ "
\n",
+ " \n",
+ " 3890 | \n",
+ " Frases | \n",
+ " 47000 | \n",
+ "
\n",
+ " \n",
+ " 4352 | \n",
+ " Shraddha Kapoor | \n",
+ " 47000 | \n",
+ "
\n",
+ " \n",
+ " 3560 | \n",
+ " Languages of India | \n",
+ " 43000 | \n",
+ "
\n",
+ " \n",
+ " 4148 | \n",
+ " New Delhi | \n",
+ " 40000 | \n",
+ "
\n",
+ " \n",
+ " 4576 | \n",
+ " Alia Bhatt | \n",
+ " 38000 | \n",
+ "
\n",
+ " \n",
+ " 4578 | \n",
+ " Urdu poetry | \n",
+ " 37000 | \n",
+ "
\n",
+ " \n",
+ " 3044 | \n",
+ " Puma SE | \n",
+ " 36000 | \n",
+ "
\n",
+ " \n",
+ " 2918 | \n",
+ " Salman Khan | \n",
+ " 33000 | \n",
+ "
\n",
+ " \n",
+ " 3746 | \n",
+ " Chord names and symbols (popular music) | \n",
+ " 32000 | \n",
+ "
\n",
+ " \n",
+ " 4052 | \n",
+ " Bengali language | \n",
+ " 32000 | \n",
+ "
\n",
+ " \n",
+ " 4332 | \n",
+ " 200 metres | \n",
+ " 29000 | \n",
+ "
\n",
+ " \n",
+ " 4792 | \n",
+ " God in Islam | \n",
+ " 29000 | \n",
+ "
\n",
+ " \n",
+ " 4714 | \n",
+ " Passion (Christianity) | \n",
+ " 28000 | \n",
+ "
\n",
+ " \n",
+ " 2126 | \n",
+ " Tamil language | \n",
+ " 28000 | \n",
+ "
\n",
+ " \n",
+ " 1954 | \n",
+ " Narendra Modi | \n",
+ " 28000 | \n",
+ "
\n",
+ " \n",
+ " 1744 | \n",
+ " Flipkart | \n",
+ " 28000 | \n",
+ "
\n",
+ " \n",
+ " 3914 | \n",
+ " Orange S.A. | \n",
+ " 27000 | \n",
+ "
\n",
+ " \n",
+ " 3292 | \n",
+ " Maharashtra | \n",
+ " 26000 | \n",
+ "
\n",
+ " \n",
+ " 3356 | \n",
+ " MercadoLibre.com | \n",
+ " 22000 | \n",
+ "
\n",
+ " \n",
+ " 3792 | \n",
+ " Cifras | \n",
+ " 21000 | \n",
+ "
\n",
+ " \n",
+ " 4710 | \n",
+ " Peugeot | \n",
+ " 21000 | \n",
+ "
\n",
+ " \n",
+ " 4050 | \n",
+ " India national cricket team | \n",
+ " 19000 | \n",
+ "
\n",
+ " \n",
+ " 4250 | \n",
+ " Indian Army | \n",
+ " 19000 | \n",
+ "
\n",
+ " \n",
+ " 4590 | \n",
+ " Arab League | \n",
+ " 18000 | \n",
+ "
\n",
+ " \n",
+ " 2560 | \n",
+ " Virat Kohli | \n",
+ " 17000 | \n",
+ "
\n",
+ " \n",
+ " 2660 | \n",
+ " Telugu language | \n",
+ " 17000 | \n",
+ "
\n",
+ " \n",
+ " 3462 | \n",
+ " Paytm | \n",
+ " 15000 | \n",
+ "
\n",
+ " \n",
+ " 3670 | \n",
+ " Sunshine pop | \n",
+ " 15000 | \n",
+ "
\n",
+ " \n",
+ " 4518 | \n",
+ " Egyptian Arabic | \n",
+ " 14000 | \n",
+ "
\n",
+ " \n",
+ " 2942 | \n",
+ " Indo pop | \n",
+ " 13000 | \n",
+ "
\n",
+ " \n",
+ " 4684 | \n",
+ " Musica sertaneja | \n",
+ " 12000 | \n",
+ "
\n",
+ " \n",
+ " 1584 | \n",
+ " Vodafone | \n",
+ " 12000 | \n",
+ "
\n",
+ " \n",
+ " 3524 | \n",
+ " Bikin | \n",
+ " 9900 | \n",
+ "
\n",
+ " \n",
+ " 2668 | \n",
+ " BlackBerry Messenger | \n",
+ " 9200 | \n",
+ "
\n",
+ " \n",
+ " 3346 | \n",
+ " Carrefour | \n",
+ " 9100 | \n",
+ "
\n",
+ " \n",
+ " 2296 | \n",
+ " Oppo Electronics | \n",
+ " 8900 | \n",
+ "
\n",
+ " \n",
+ " 4460 | \n",
+ " Bandung | \n",
+ " 8800 | \n",
+ "
\n",
+ " \n",
+ " 3662 | \n",
+ " Operatic pop | \n",
+ " 8300 | \n",
+ "
\n",
+ " \n",
+ " 3276 | \n",
+ " Mahendra Singh Dhoni | \n",
+ " 7200 | \n",
+ "
\n",
+ " \n",
+ " 4172 | \n",
+ " BBC News Online | \n",
+ " 7100 | \n",
+ "
\n",
+ " \n",
+ " 3038 | \n",
+ " Grand Prix of Portland | \n",
+ " 6900 | \n",
+ "
\n",
+ " \n",
+ " 1938 | \n",
+ " Supporters of FC Barcelona | \n",
+ " 6700 | \n",
+ "
\n",
+ " \n",
+ " 2734 | \n",
+ " CCTV News | \n",
+ " 6300 | \n",
+ "
\n",
+ " \n",
+ " 3556 | \n",
+ " Wonky pop | \n",
+ " 6100 | \n",
+ "
\n",
+ " \n",
+ " 608 | \n",
+ " UC Browser | \n",
+ " 4300 | \n",
+ "
\n",
+ " \n",
+ " 4382 | \n",
+ " Dari (Persian dialect) | \n",
+ " 4200 | \n",
+ "
\n",
+ " \n",
+ " 3690 | \n",
+ " Sophisti-pop | \n",
+ " 4000 | \n",
+ "
\n",
+ " \n",
+ " 1882 | \n",
+ " Value-added tax | \n",
+ " 3900 | \n",
+ "
\n",
+ " \n",
+ " 2842 | \n",
+ " Indian pop | \n",
+ " 3500 | \n",
+ "
\n",
+ " \n",
+ " 2096 | \n",
+ " Types of business entity | \n",
+ " 2600 | \n",
+ "
\n",
+ " \n",
+ " 2946 | \n",
+ " India News | \n",
+ " 2100 | \n",
+ "
\n",
+ " \n",
+ " 3602 | \n",
+ " V-pop | \n",
+ " 1900 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " interest_name mau_audience\n",
+ "2798 African Union 120000\n",
+ "3682 Space age pop 120000\n",
+ "4248 Worldbeat 110000\n",
+ "2070 Franco De Vita 110000\n",
+ "4788 Public sector 110000\n",
+ "1444 Samsung Galaxy S III 110000\n",
+ "3586 Handball 110000\n",
+ "3724 International News Service v. Associated Press 110000\n",
+ "1798 Heel (shoe) 110000\n",
+ "3600 Ronaldinho 100000\n",
+ "4776 Ottoman Empire 100000\n",
+ "1778 Arab world 100000\n",
+ "2724 Legal personality 98000\n",
+ "4448 Comune 93000\n",
+ "3864 Member states of the Arab League 92000\n",
+ "3348 Tunisia 92000\n",
+ "3672 Jangle pop 91000\n",
+ "2242 Spain national under-21 football team 88000\n",
+ "1590 F.C. Porto 85000\n",
+ "4470 Juventus F.C. 85000\n",
+ "3680 Malaysian pop 85000\n",
+ "3178 Argentina national football team 85000\n",
+ "3480 Deepika Padukone 81000\n",
+ "3196 Kuwait 79000\n",
+ "3522 Sheikh 78000\n",
+ "3518 States and union territories of India 77000\n",
+ "1432 El Clasico 76000\n",
+ "4270 Sport Club Internacional 75000\n",
+ "4478 Ali 73000\n",
+ "3788 Renault 70000\n",
+ "4010 Audi A7 69000\n",
+ "3198 Thai baht 68000\n",
+ "4122 A.C. Milan 66000\n",
+ "3008 Urdu 66000\n",
+ "2234 My Talking Tom 66000\n",
+ "3086 Shah Rukh Khan 65000\n",
+ "4476 Quezon City 65000\n",
+ "3896 Cable 65000\n",
+ "3954 Vivo (telecommunications) 61000\n",
+ "1452 Peso 58000\n",
+ "2892 Nescafe 58000\n",
+ "4386 100 metres 57000\n",
+ "3534 SMS (hydrology software) 56000\n",
+ "3326 Algeria 53000\n",
+ "3378 Atletico Madrid 53000\n",
+ "4338 Xiaomi 51000\n",
+ "3866 Rede Globo 49000\n",
+ "3578 Russian pop 48000\n",
+ "3890 Frases 47000\n",
+ "4352 Shraddha Kapoor 47000\n",
+ "3560 Languages of India 43000\n",
+ "4148 New Delhi 40000\n",
+ "4576 Alia Bhatt 38000\n",
+ "4578 Urdu poetry 37000\n",
+ "3044 Puma SE 36000\n",
+ "2918 Salman Khan 33000\n",
+ "3746 Chord names and symbols (popular music) 32000\n",
+ "4052 Bengali language 32000\n",
+ "4332 200 metres 29000\n",
+ "4792 God in Islam 29000\n",
+ "4714 Passion (Christianity) 28000\n",
+ "2126 Tamil language 28000\n",
+ "1954 Narendra Modi 28000\n",
+ "1744 Flipkart 28000\n",
+ "3914 Orange S.A. 27000\n",
+ "3292 Maharashtra 26000\n",
+ "3356 MercadoLibre.com 22000\n",
+ "3792 Cifras 21000\n",
+ "4710 Peugeot 21000\n",
+ "4050 India national cricket team 19000\n",
+ "4250 Indian Army 19000\n",
+ "4590 Arab League 18000\n",
+ "2560 Virat Kohli 17000\n",
+ "2660 Telugu language 17000\n",
+ "3462 Paytm 15000\n",
+ "3670 Sunshine pop 15000\n",
+ "4518 Egyptian Arabic 14000\n",
+ "2942 Indo pop 13000\n",
+ "4684 Musica sertaneja 12000\n",
+ "1584 Vodafone 12000\n",
+ "3524 Bikin 9900\n",
+ "2668 BlackBerry Messenger 9200\n",
+ "3346 Carrefour 9100\n",
+ "2296 Oppo Electronics 8900\n",
+ "4460 Bandung 8800\n",
+ "3662 Operatic pop 8300\n",
+ "3276 Mahendra Singh Dhoni 7200\n",
+ "4172 BBC News Online 7100\n",
+ "3038 Grand Prix of Portland 6900\n",
+ "1938 Supporters of FC Barcelona 6700\n",
+ "2734 CCTV News 6300\n",
+ "3556 Wonky pop 6100\n",
+ "608 UC Browser 4300\n",
+ "4382 Dari (Persian dialect) 4200\n",
+ "3690 Sophisti-pop 4000\n",
+ "1882 Value-added tax 3900\n",
+ "2842 Indian pop 3500\n",
+ "2096 Types of business entity 2600\n",
+ "2946 India News 2100\n",
+ "3602 V-pop 1900"
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "US_interests.loc[:, ['interest_name', audience_var]].tail(n=100)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "No more `Coupons`. Let's see how different these distributions are."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 125,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "interest_vars = ['interest_name', audience_var]\n",
+ "expat_interest_normed = expat_interests_clean.loc[:, interest_vars]\n",
+ "US_interest_normed = US_interests.loc[:, interest_vars]\n",
+ "MX_interest_normed = MX_interests.loc[:, interest_vars]\n",
+ "expat_interest_normed.loc[:, audience_var] = expat_interest_normed.loc[:, audience_var] / max_expat_audience\n",
+ "US_interest_normed.loc[:, audience_var] = US_interest_normed.loc[:, audience_var] / max_US_audience\n",
+ "MX_interest_normed.loc[:, audience_var] = MX_interest_normed.loc[:, audience_var] / max_MX_audience"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 126,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1447 shared interests\n"
+ ]
+ }
+ ],
+ "source": [
+ "shared_interests = list(set(expat_interest_normed.loc[:, 'interest_name'].unique()) & set(US_interest_normed.loc[:, 'interest_name'].unique()))\n",
+ "print('%d shared interests'%(len(shared_interests)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 127,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " interest_name | \n",
+ " mau_audience_expat | \n",
+ " mau_audience_US | \n",
+ " mau_audience_MX | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1090 | \n",
+ " 1,000,000,000 | \n",
+ " 0.024211 | \n",
+ " 0.028105 | \n",
+ " 0.006000 | \n",
+ "
\n",
+ " \n",
+ " 1234 | \n",
+ " 1080p | \n",
+ " 0.017105 | \n",
+ " 0.010458 | \n",
+ " 0.011273 | \n",
+ "
\n",
+ " \n",
+ " 1340 | \n",
+ " 20th Century Fox | \n",
+ " 0.010000 | \n",
+ " 0.005686 | \n",
+ " 0.076364 | \n",
+ "
\n",
+ " \n",
+ " 461 | \n",
+ " 3D computer graphics | \n",
+ " 0.057895 | \n",
+ " 0.037255 | \n",
+ " 0.021818 | \n",
+ "
\n",
+ " \n",
+ " 1390 | \n",
+ " 4G | \n",
+ " 0.004737 | \n",
+ " 0.003791 | \n",
+ " 0.004909 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " interest_name mau_audience_expat mau_audience_US \\\n",
+ "1090 1,000,000,000 0.024211 0.028105 \n",
+ "1234 1080p 0.017105 0.010458 \n",
+ "1340 20th Century Fox 0.010000 0.005686 \n",
+ "461 3D computer graphics 0.057895 0.037255 \n",
+ "1390 4G 0.004737 0.003791 \n",
+ "\n",
+ " mau_audience_MX \n",
+ "1090 0.006000 \n",
+ "1234 0.011273 \n",
+ "1340 0.076364 \n",
+ "461 0.021818 \n",
+ "1390 0.004909 "
+ ]
+ },
+ "execution_count": 127,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "combined_interests = pd.merge(expat_interest_normed.rename(columns={audience_var:'%s_expat'%(audience_var)}), \n",
+ " US_interest_normed.rename(columns={audience_var:'%s_US'%(audience_var)}), on='interest_name')\n",
+ "combined_interests = pd.merge(combined_interests, \n",
+ " MX_interest_normed.rename(columns={audience_var:'%s_MX'%(audience_var)}), on='interest_name')\n",
+ "combined_interests.sort_values('interest_name', inplace=True, ascending=True)\n",
+ "combined_interests.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 139,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzsnXl8VNX5/z93ZpJMdkgI67AIUUyQxZqUKrZqNzRqtNXyBau1WkUt37Zf+631141S2n6hXy1VS/1arNa2FOJWK6JEI7IoAmFCCJB9mSyTdTJZZ5+59/z+mH3mztyZJDMJ8LxfL15k7j333Odu5znPc57zHI4xxkAQBEEQAGSTLQBBEAQxdSClQBAEQXggpUAQBEF4IKVAEARBeCClQBAEQXggpUAQBEF4IKVAEARBeCClQBAEQXggpUAQBEF4UEy2ANEyY8YMLFq0aLLFIAiCuKBobW1Ff3+/ZLkLTiksWrQIarV6ssUgCIK4oCgoKIioHLmPCIIgCA+kFAiCIAgPMVUKpaWlWLp0KXJzc7F9+/ag/e3t7bjppptw9dVXY8WKFXjvvfdiKQ5BEAQhQczGFHiex6ZNm1BWVgaVSoXCwkIUFxcjPz/fU+Y3v/kN1q1bh8ceeww1NTUoKipCa2trrEQiCOISwW63Q6vVwmKxTLYocUepVEKlUiEhIWFMx8dMKZSXlyM3NxeLFy8GAKxfvx5vv/22n1LgOA4jIyMAgOHhYcydOzdW4hAEcQmh1WqRnp6ORYsWgeO4yRYnbjDGoNfrodVqcdlll42pjpi5jzo7OzF//nzPb5VKhc7OTr8yW7Zswe7du6FSqVBUVIQ//vGPsRKHIIhLCIvFguzs7EtKIQDOjnZ2dva4LKSYKQWxBd0CH9DevXvx7W9/G1qtFu+99x7uu+8+CIIQdNyuXbtQUFCAgoIC6HS6WIlMEMRFxKWmENyM97pjphRUKhU6Ojo8v7VabZB76KWXXsK6desAANdeey0sFovo5IqNGzdCrVZDrVYjJycnViITUaDXMxgM0a/kqtcLMBppBViCmKrETCkUFhaisbERGo0GNpsNJSUlKC4u9iuzYMECHDx4EABQW1sLi8VCjf4FQnk5j48/5sdwnICjR6M/jiAIL6+88gq6urpiUnfMlIJCocDOnTuxdu1a5OXlYd26dVi2bBk2b96Mffv2AQB+//vf48UXX8TKlSuxYcMGvPLKK5esyUcQBBEpsVQKMU1zUVRUhKKiIr9tW7du9fydn5+PY8eOxVIEgiCISWH37t147rnnYLPZsHr1avz0pz/Fl7/8ZRw/fhxZWVm44YYb8Itf/AJXXHEFbr75ZqxevRqVlZW44oor8Pe//x0pKSnYunUr3nnnHZjNZlx33XX485//jDfffBNqtRrf/OY3kZycjOPHjyM5OXnC5L7gch8RBEFEQ20tD1fk+4SRkQHk5cnDnLMWr776Ko4dO4aEhAR897vfxZEjR/Dkk0/i0UcfxerVq5Gfn4+vfvWraG1tRX19PV566SWsWbMGDz74IJ5//nn86Ec/wn/+539i8+bNAID77rsP+/fvx913342dO3fi6aefjjifUTRQmguCIIgJ5uDBg6ioqEBhYSFWrVqFgwcPoqWlBQ899BBGR0fxwgsv4Omnn/aUnz9/PtasWQMAuPfee/HJJ58AAA4dOoTVq1dj+fLl+Oijj1BdXR1z2clSIAjioiZcjz5WMMZw//33Y9u2bX7bTSYTtFotAMBgMCA9PR1AcBgpx3GwWCz47ne/C7Vajfnz52PLli1xmaFNlgJBEMQE86UvfQlvvPEG+vr6AAADAwNoa2vDk08+iW9+85vYunUrHn74YU/59vZ2HD9+HIBz/tb111/vUQAzZsyAwWDAG2+84Smfnp6O0dHRmMhOSoEgCGKCyc/Px29+8xt89atfxYoVK/CVr3wFra2tOHXqlEcxJCYm4q9//SsAIC8vD3/729+wYsUKDAwM4LHHHsO0adPw8MMPY/ny5bjzzjtRWFjoqf/b3/42Hn30UaxatQpms3lCZeeY2NTjKUxBQQEtsjMFOHDAAQC45ZboPJBjPY4goqG2thZ5eXmTLUZEtLa24rbbbsP58+cnrE6x64+07SRLgSAIgvBASoEgCGISWbRo0YRaCeOFlAJBEAThgZQCQRAE4YGUAkEQBOGBlAJBEAThgZQCQRDEBNPa2oqrrrrKb9uWLVvw9NNP48SJE1i9ejVWrVqFvLw8bNmyZXKEDAEFixMEQcSR+++/H6+99hpWrlwJnudRX18/2SL5QZYCQRBEHOnr68OcOXMAAHK5HPn5+ZMskT9kKRAEcVHD19YiFrmz5WOcMf34449j6dKluPHGG3HzzTfj/vvvh1KpnFj5xkFMLYXS0lIsXboUubm52L59e9D+xx9/HKtWrcKqVatwxRVXYNq0abEUhyAIIi6EWkGS4zhs3rwZarUaX/3qV7Fnzx7cfPPNcZYuPDGzFHiex6ZNm1BWVgaVSoXCwkIUFxf7mUp/+MMfPH//8Y9/RGVlZazEIQjiEmWsPfrxkJ2djcHBQb9tAwMDuOyyywAAS5YswWOPPYaHH34YOTk50Ov1yM7OjrucYsTMUigvL0dubi4WL16MxMRErF+/Hm+//XbI8nv37sWGDRtiJQ5BEETcSEtLw5w5c3Dw4EEAToVQWlqK66+/Hu+++y7ceUgbGxshl8unlJckZpZCZ2cn5s+f7/mtUqlw8uRJ0bJtbW3QaDT44he/GCtxiAkmvf4kHKmZAK6SLEsQlyJ///vfsWnTJvz3f/83AOCXv/wllixZgp/97Gd4/PHHkZKSAoVCgX/+85+Qy+O/EFAoYqYUxDJyh/KzlZSU4O677w55Y3bt2oVdu3YBAHQ63cQJSYyZBMMgEgyDiFYpyGxmMJkCFONAXOzk5+fj0KFDQdtLSkomQZrIiZn7SKVSoaOjw/Nbq9Vi7ty5omVLSkrCuo42btwItVoNtVqNnJycCZeViB/Tzh1BZvXHky0GQRAhiJlSKCwsRGNjIzQaDWw2G0pKSlBcXBxUrr6+HoODg7j22mtjJQoxxZA5bJMtAkEQIYiZUlAoFNi5cyfWrl2LvLw8rFu3DsuWLcPmzZuxb98+T7m9e/di/fr1IV1LBEEQRPyIqWO3qKgIRUVFftu2bt3q93uq5f0gCIK4lJG0FDQaTUTbCIIgiAsfSaVw1113BW27++67YyIMQRAEMbmEdB/V1dWhuroaw8PD+Ne//uXZPjIyAovFEhfhCIIgLlQ4jsO9996Lf/zjHwAAh8OBOXPmYPXq1di/fz927NiB6upqvPTSSwCAf/7zn9izZw/efffdyRQ7tFKor6/H/v37MTQ0hHfeecezPT09HS+++GJchCMIgrhQSU1Nxfnz52E2m5GcnIyysjLMmzfPs//73/8+CgoKcOzYMSxbtgw///nPPTOgJ5OQSuGOO+7AHXfcgePHj1O4KEEQxBi45ZZb8O677+Luu+/2pPL5+GPnPB2FQoHnn38e3/3ud/HZz34WDz74IBYvXjzJEkcQffTWW29h2bJlSE5Oxs0334yqqio888wzuPfee+MhH3GR0Z3RhxTb1EkTTFz81PK1GMHEps7OQAby5NKJ9tavX4+tW7fitttuw9mzZ/Hggw96lAIAXHfddcjLy8OHH36I2traCZVxrEgONH/wwQfIyMjA/v37oVKp0NDQgKeeeioeshEXIa3ZHaiZ0zjZYhBEXFixYgVaW1uxd+/eoPB8ADAYDFCr1bDb7VMmhY+kpWC32wEA7733HjZs2ICsrKyYC0UQBDFRRNKjjyXFxcX40Y9+hMOHD0Ov1/vt++Uvf4l7770Xs2bNwuOPP47XX399kqT0IqkUbr/9dlx55ZVITk7G888/D51ON6VWCSIIgpjKPPjgg8jMzMTy5ctx+PBhz/Zz587h3XffxZkzZ5CYmIiXX34ZZWVl+MpXvjJ5wiIC99H27dtx/PhxqNVqJCQkICUlJey6CMSlAc8JECBMthgEMeVRqVT4wQ9+4LeNMYbHHnsMf/jDH6BUKiGTyfD888/jBz/4AWy2yc0NJqkUTCYT/vSnP+Gxxx4DAHR1dUGtVsdcMGJqU76oEufm1U22GAQxZTEYDEHbbrzxRuzfvx8cx+GTTz7BLbfc4tlXUFCAmpoaJCYmxlPMICSVwgMPPIDExER8+umnAJxa7+c//3nMBSOmPqZE82SLQBDEBCOpFJqbm/HjH/8YCQkJAIDk5GTRBXQIgiCICx9JpZCYmAiz2exJbd3c3IykpKSYC0YQBDEeLtXO63ivWzL66Fe/+hVuvvlmdHR04Jvf/CaOHTuGV155ZVwnJQiCiCVKpRJ6vR7Z2dmX1FotjDHo9fpxRYiGVQqMMVx55ZX417/+hRMnToAxhmeffRYzZswY8wkJgiBijUqlglarnTITwuKJUqmESqUa8/FhlQLHcbjzzjtRUVGBW2+9NerKS0tL8YMf/AA8z+Ohhx7C//t//y+ozGuvvYYtW7aA4zisXLkSe/bsifo8BEEQviQkJOCyyy6bbDEuSCTdR5/73Odw6tQpFBYWRlUxz/PYtGkTysrKoFKpUFhYiOLiYuTn53vKNDY2Ytu2bTh27BimT5+Ovr6+6K+AIAiCmDAkB5oPHTqEz33uc1iyZAlWrFiB5cuXY8WKFZIVl5eXIzc3F4sXL0ZiYiLWr18fNOntxRdfxKZNmzB9+nQAwMyZM8d4GQRBEMREIGkpHDhwYEwVd3Z2Yv78+Z7fKpUKJ0+e9CvT0NAAAFizZg14nseWLVtw8803j+l8BEEQxPiRtBQWLlyIjo4OfPTRR1i4cCFSUlIgCNLpDcTCogKjABwOBxobG3H48GHs3bsXDz30EIaGhoKO27VrFwoKClBQUHBJDhwRBEHEC0ml8Ktf/Qq/+93vsG3bNgDOrKmRrKWgUqnQ0dHh+a3VajF37tygMnfccYdnUGjp0qVobAxOq7xx40ao1Wqo1Wrk5ORInpsgCIIYG5JK4a233sK+ffuQmpoKAJg7dy5GR0clKy4sLERjYyM0Gg1sNhtKSkpQXFzsV+bOO+/EoUOHAAD9/f1oaGiYEisPEQRBXKpENKOZ4ziP68doNEZUsUKhwM6dO7F27Vrk5eVh3bp1WLZsGTZv3ox9+/YBANauXYvs7Gzk5+fjpptuwlNPPYXs7OxxXA5BEAQxHiQHmtetW4dHHnkEQ0NDePHFF/Hyyy/joYceiqjyoqKioNWGtm7d6vmb4zjs2LEDO3bsiFJsgiAIIhZIKoUf/ehHKCsrQ0ZGBurr67F169ZJXwSCIAiCiA2SSuHJJ5/E7373Oz9F4N5GEARBXFxIjimUlZUFbRvr3AWCIAhiahPSUvi///s/PP/882hpafGbwTw6Ooo1a9bERTiCIAgivoRUCvfccw9uueUW/OQnP8H27ds929PT05GVlRUX4QiCIIj4ElIpZGZmIjMzE3v37gXP8+jt7YXD4YDBYIDBYMCCBQviKSdBEAQRByQHmnfu3IktW7Zg1qxZkMmcQxAcx+Hs2bMxF44gCIKIL5JK4ZlnnkF9fT1NKiMIgrgEkIw+mj9/PjIzM+MhC0EQBDHJSFoKixcvxo033ohbb70VSUlJnu0//OEPYyoYQRAEEX8klcKCBQuwYMEC2Gw22Gy2eMhEEARBTBKSSuGXv/xlPOQgCIIgpgAhlcJ//dd/4ZlnnsHtt98etDgOAE+mU4IgCOLiIaRSuO+++wA4E+IRBEEQlwYhlcI111wDALjhhhviJgxBEAQxuUiGpBIEQRCXDjFVCqWlpVi6dClyc3P98ie5eeWVV5CTk4NVq1Zh1apV+Mtf/hJLcQiCIAgJJKOPxgrP89i0aRPKysqgUqlQWFiI4uJi5Ofn+5X7j//4D+zcuTNWYhAEQRBRIKkUGhoa8NRTT6GtrQ0Oh8Oz/aOPPgp7XHl5OXJzc7F48WIAwPr16/H2228HKQWCIAhi6iCpFL7xjW/g0UcfxcMPPwy5XB5xxZ2dnZg/f77nt0qlwsmTJ4PKvfnmmzh69CiuuOIK/OEPf/A7hiAIgogvkkpBoVDgsccei7pixljQtsD5Drfffjs2bNiApKQkvPDCC7j//vtFLZBdu3Zh165dAACdThe1LARBEERkhBxoHhgYwMDAAG6//XY8//zz6O7u9mwbGBiQrFilUqGjo8PzW6vVYu7cuX5lsrOzPfmUHn74YVRUVIjWtXHjRqjVaqjVauTk5ER0YQRBEET0hJ2nwHGcp8f/1FNPefZxHIeWlpawFRcWFqKxsREajQbz5s1DSUkJ9uzZ41emu7sbc+bMAeCcIZ2XlzfmCyEIgiDGT0iloNFoAAAWiwVKpdJvn8Vika5YocDOnTuxdu1a8DyPBx98EMuWLcPmzZtRUFCA4uJiPPfcc9i3bx8UCgWysrLwyiuvjO9qCIIgiHEhOaZw3XXX4fTp05LbxCgqKkJRUZHftq1bt3r+3rZtG7Zt2xaprARBEESMCakUenp60NnZCbPZjMrKSo8baWRkBCaTKW4CEgRBEPEjpFJ4//338corr0Cr1fotqJOeno7/+Z//iYtwBEEQRHwJqRTuv/9+3H///XjzzTdx1113xVMmgiAIYpIIqRR2796Ne++9F62trdixY0fQflqOkyAI4uIjpFIwGo0AAIPBEDdhCIIgiMklpFJ45JFHAABPPvlkUEgqQRAEcXEiGZJ61VVXYdasWfj85z+PL3zhC1izZg0yMzPjIRtBEAQRZyTXU2hqasLevXuxfPly7N+/HytXrsSqVaviIRtBEAQRZyQtBa1Wi2PHjuHjjz9GVVUVli1bhuuvvz4eshEEQRBxRlIpLFiwAIWFhfjpT3+KF154IR4yEQRBEJOEpPuosrIS3/rWt7Bnzx5ce+21+Na3voWXXnopHrIRBEEQcUbSUli5ciWWLFmCJUuW4OOPP8bu3btx9OhRfOc734mHfARBEEQckVQKBQUFsFqtuO6663D99dfj6NGjWLhwYTxkIwiCIOKMpFI4cOAALWxDXFBYrQwKBSCXc9KFCYLwQ3JMgRQCcaHx0Uc8Pv2Un2wxwqIX9BhhI5MtBkEEIakUCOJCZKpnZykXynGMPzbZYhBEEDFVCqWlpVi6dClyc3Oxffv2kOXeeOMNcBwHtVodS3EIgiAICSTHFADg008/RWtrKxwOh2fbt771rbDH8DyPTZs2oaysDCqVCoWFhSguLkZ+fr5fudHRUTz33HNYvXr1GMQnLiTcCzURBDF1kVQK9913H5qbm7Fq1SrI5XIAAMdxkkqhvLwcubm5WLx4MQBg/fr1ePvtt4OUwi9+8Qv8+Mc/xtNPPz3WayAIgiAmCEmloFarUVNTA46LLpKjs7MT8+fP9/xWqVQ4efKkX5nKykp0dHTgtttuI6VAEAQxBZAcU7jqqqvQ09MTdcVirgJfxSIIAh5//HH8/ve/l6xr165dKCgoQEFBAXQ6XdSyEARBEJEhaSn09/cjPz8fn/3sZ5GUlOTZvm/fvrDHqVQqdHR0eH5rtVrMnTvX83t0dBTnz5/HjTfeCADo6elBcXEx9u3bh4KCAr+6Nm7ciI0bNwJA0D6CIAhi4pBUClu2bBlTxYWFhWhsbIRGo8G8efNQUlKCPXv2ePZnZmaiv7/f8/vGG2/E008/TY0+QRDEJCLpPrrhhhtw5ZVXYnR0FKOjo8jLy8MNN9wgWbFCocDOnTuxdu1a5OXlYd26dVi2bBk2b94saWUQBEEQk4OkpfDaa6/hiSeewI033gjGGL73ve/hqaeewt133y1ZeVFREYqKivy2bd26VbTs4cOHI5OYIAiCiBmSSuG3v/0tTp06hZkzZwIAdDodvvzlL0ekFAhiUmAMiDJajiAIJ5JKQRAEj0IAgOzsbAiCEFOhCGI8ZJ1+H7bMmQA+O9miEMQFh6RSuPnmm7F27Vps2LABAPDqq68GuYQIIhLiOaE5cbgvficjiIsISaXw1FNP4c0338SxY8fAGMPGjRvxta99LR6yEQQRYzqEDnSxLqyWU5oZwklEuY/uuusu3HXXXbGWhSCIOHNeOD/ZIhBTjJAhqddffz0AID09HRkZGZ5/7t8EES2UEI8gpj4hLYVPPvkEgHPmMUEQBHFpIDl57b777otoG0EQBHHhI6kUqqur/X47HA5UVFTETCCCIAhi8gipFLZt24b09HScPXvWbzxh1qxZuOOOO+IpI0EQBBEnQiqFn/zkJxgdHcUTTzyBkZERjIyMYHR0FHq9Htu2bYunjARBEESckAxJ3bZtGwYHB9HY2AiLxeLZ/oUvfCGmghEEQRDxR1Ip/OUvf8Gzzz4LrVaLVatW4cSJE7j22mvx0UcfxUM+giAIIo5IDjQ/++yzOHXqFBYuXIhDhw6hsrISOTk58ZCNIAiCiDOSSkGpVEKpVAIArFYrrrzyStTX18dcMOLigwk0eY0gpjqS7iOVSoWhoSHceeed+MpXvoLp06f7LatJEJcibGQEUCrBJSZOtijEBUZjo4DhYYaCAvlkiyKKpKXw1ltvYdq0adiyZQt+/etf4zvf+Q7+/e9/R1R5aWkpli5ditzcXGzfvj1o/wsvvIDly5dj1apVuP7661FTUxP9FRDEJMAfOwbeNeufIKKhqUmATjd1reaQlsLAwEDQtuXLlwMADAYDsrKywlbM8zw2bdqEsrIyqFQqFBYWori4GPn5+Z4y99xzDx599FEAwL59+/DDH/4QpaWlY7oQgog7VutkS0AQE05IpXDNNdeA4zjRJGYcx6GlpSVsxeXl5cjNzcXixYsBAOvXr8fbb7/tpxR8E+sZjUZwtFrWRQ0lxCOIqU9IpaDRaMZVcWdnJ+bPn+/5rVKpcPLkyaByf/rTn7Bjxw7YbDYKcyUIgphkJAeajx49KrpdavJaKAsjkE2bNmHTpk3Ys2cPfvOb3+Bvf/tbUJldu3Zh165dAJxrRBMEQRCxIaKV19xYLBaUl5fjmmuukezVq1QqdHR0eH5rtdqwUUvr16/HY489Jrpv48aN2LhxIwCgoKBASmSCAAAwQQAn84+lMJkYEhKAhITxuSqtVgaFIsJVqoiYYTIxJCYCCgW5nicKyXf6nXfe8fvd0dGBH//4x5IVFxYWorGxERqNBvPmzUNJSQn27NnjV6axsRGXX345AODdd9/1/E0QseLIER4pKcANN4yvOT9/XoBSCay8bYIEI8bEkSM80tKAz3+e1PNEEfWdVKlUOH9eegk/hUKBnTt3Yu3ateB5Hg8++CCWLVuGzZs3o6CgAMXFxdi5cyc+/PBDJCQkYPr06aKuI+LiYaqMM5tME1OPTyowYhIxGCZbgosLSaXwve99zzMWIAgCzpw5g5UrV0ZUeVFREYqKivy2bd261fP3s88+G42sBEEQRIyRVAq+PnyFQoENGzZgzZo1MRWKuEiJk6nAwMCBfMwEMRYklcL9998Pm82Guro6cByHpUuXxkMu4iKEIT5K4cRlpzHdlInPxuVsBHFxIakU3nvvPTzyyCNYsmQJGGPQaDT485//jFtuuSUe8hHEmBhMGRa1TDJqPoUtazaAK+IvFEFcAEgqhR/+8Ic4dOgQcnNzAQDNzc249dZbSSkQUxKpWdMK8wgUnSMgpUAQ4kgmxJs5c6ZHIQDA4sWLMXPmzJgKRRATgUCpugkiaiQthWXLlqGoqAjr1q0Dx3F4/fXXUVhYiH/9618AgK9//esxF5IgxsRUiYG9AGCMXZC5xwYTepDMpwGYNtmiRIzCMAiZ1QRg4WSLIoqkUrBYLJg1axaOHDkCAMjJycHAwADeeecdcBxHSoEgiEmjJf0MAOBruHBmEWbUu3PAXaBK4a9//Ws85CAIYhxcqD19YuohOaag1Wrxta99DTNnzsSsWbNw1113QavVxkM24iIjHstx+nqMLhXvUUeHgNJSHhbL+C+YmUxgEzXlmxgTBw448Mknjkk7v6RSeOCBB1BcXIyuri50dnbi9ttvxwMPPBAP2YgpygWzLsKFIuc46epyXqfROPbrdc8h4Y8cAe9yFY8HQWAXznsyBRkdnbxzSyoFnU6HBx54AAqFAgqFAt/+9rcpfTVxwXJyUSVas8jSjTXvv8/j3DlhssUgxoCkUpgxYwZ2794NnufB8zx2796N7OzseMhGEOOCsWCrRuAEdGf2TpJElxadnZNjKZCVMj4klcLLL7+M1157DbNnz8bs2bPxxhtv4OWXX46HbMRFRjzSXPg1BiYj+NJSCF1dMT/vxYLRyMblhpoKvP8+j08/5SdbjAsWyeijBQsWYN++ffGQhSAmFDYyCg4A6+4GwizwNJUQBIZTpwRcfrkMWVnxjyaqq3O6fFbH/cwTy8jI+I6329m4F2K6UJG0FFpaWnD77bcjJycHM2fOxB133IGWlpZ4yEZMUaayaR4P0cZz/WYzQ19faF+7yQQMDDCcO0c93bFilA/DIjOO+fj2dgEffsjDYJi673kskVQK99xzD9atW4fu7m50dXXhG9/4BjZs2BAP2Qhi3NjtU+vD/vhjHhUVU28ANl4ZbONBXeZxVE/7eMzH63Tjj+a6kJFUCowx3HfffZ7oo3vvvTfiSTKlpaVYunQpcnNzsX379qD9O3bsQH5+PlasWIEvfelLaGtri/4KiAuH+HTjPX8aRhnOnhXQ3TN1Pm4+QgOA5qERk4WkUrjpppuwfft2tLa2oq2tDf/7v/+LW2+9FQMDAxgYGAh5HM/z2LRpEw4cOICamhrs3bsXNTU1fmWuvvpqqNVqnD17FnfffXdEaz8TRKSYzE5lMDQ0sUphKrvPiPFzqT9eyYHmV199FQDw5z//2W/7yy+/DI7jQo4vlJeXIzc3F4sXLwYArF+/Hm+//Tby8/M9ZW666SbP35/73Oewe/fu6K+AIEIRq4/7Um81pgiknGODpFLQaDRjqrizsxPz58/3/FapVDh58mTI8i+99BKt0UCMG792Ygo2GrqkDuiSOpAvXfSChRrryOB5hiNHeKxYIcOMGZJOm7gRM0nEXoxQYxG7d++GWq3GE088Ibp/165dKCgoQEFBAc2mngKM9ZuPd1vhOd8EO+jHcx3tqdUwK0YmteFkw8NwTmCSAAAgAElEQVRwlJaCWSzebRNsVmWe/xhJfe0TWudYaW8XRPNCCQJDc7MwaetuGAyA1QrU10+twIOYKQWVSoWOjg7Pb61Wi7kiseIffvghfvvb32Lfvn1ISkoSrWvjxo1Qq9VQq9XIycmJlchxwWplKC11YHDw0utNxT3CJaDhnUo92PGI0t8v4IMPHHA4Aq8vsuOFtjaAMbD+fsmyw8PO9zXaZHtyqxGpHTXSBWOMzcZQXS3g1KngEf7mZoaGBgEdHWN7GEYjA3M4wMzm8YrpB2e3gnPYJ7TOaAipFI4dOwYAsFqtY6q4sLAQjY2N0Gg0sNlsKCkpQXFxsV+ZyspKPPLII9i3b98ls5rbwAADY0Br69TqHUTDVGpcg5jKsk0QDQ0CeN7Z04w17e0CGPOGaUZCvB5BJOdxlxFrxnjeudMxhoSkvb0Cjh7loTtwAvzhw9FXgNDyTz97CNOrDo6pzokgpFL4/ve/DwC49tprx1SxQqHAzp07sXbtWuTl5WHdunVYtmwZNm/e7Jkh/cQTT8BgMOAb3/gGVq1aFaQ0CGI8xKxxGkfFMqsJiiFd2CqU3c3grKF7n4HHChBglA9HL0wE1zER95AxBr6hAcw+eb1fMcbjVXRnMTX3DonuH2ChIzOnOiEHmhMSEvDAAw+gs7PToyB8ee655yQrLyoqQlFRkd+2rVu3ev7+8MMPo5H1gkcQnObqBc8U7o2Lieb+9ifKdzyey0/pqHX9NUu8bqMRKV2NUFh6Yb/x82htZcjN5cLODWpPqkdjZisK8HnMQKakDDzP0NMlYN6yyC5kKKEXJxQNULEbx7SQD9PpwJqbIZhMkK9aFfXxsSbU8xyr0tAKWpwTzmGlbCXmyi6M9Cq+hFQK+/fvx4cffoiPPvoI11xzTTxlumhpb2eY6uuXOD75BJxSCXlBwYTWa7UyHD4c39QNwhRWXiFxycwJPOrqBGi1DGlpMsyZE7qFMsqciX5ssEV0itZWhsFuhoQ+BrjarHDjPa2p56HiHLDDjkQkRnghPrifQ6Qz9yKuliG16TTs0ybP9Sz2ipmYCUYjgyl9bB/7iHIUckE+TsnGTkilMGPGDKxfvx55eXlYuXJlPGWaFOzMDgUUMV3S8IJoo0ZHwWKwwsdETyALic9NjtnANmPoS9Mj1ZY8niokt7vb0KCyggCFYRjt7VmYNi36xkNwGasCk37XmcAgN48CiPxa4zmmwIEhcWhsqdBlxhFw9kREc22BmExAX58AX3ugTyegrk2AahFDrrhB6EFhGATS/K276jkNAIAvjlmq8SEZfZSdnX3RL8dpZmZ8yH+INnbxp9mY6Kgnh4NhdDTSOidZK05Qa8UY0JzTirPzaqULT4AogWWVrTXIqD+J7ubxjTTX1wkwmcILouhqRYq2Htzo8NiV7BTtDSWePobM6rHlSHL3HUdHERS9ZHLlTJL0CphNyKg/CWXr5Edp+ULLcQIYZs5BOj3Tx/Q8k53PRqux4lzJGfRoI3MzRIL6oB4Vr469cYwl8W6LtFoBer0Au52htpYf0xhGJDKbbb2on9UMxjufoyzCF0vQ6SAETEaVsuBsVj0ccgcQQYikhVlQzpfDJkzugLJiqA9yg3cAOKWjFnKTeC5tGe+I2XsiWa9r4F1uHGee7wlGUin09fVd9Mtx8nDa6QrpCd7jZxJ7Tdb6ZiTpO2FpnLhJRdypE0juiSyV+lh7mgcOONDYGNkAvW+4LGNCwL4xnT5izp0TUF4uoK5OQGsrQ3d36BOGDOuNQMim6S0YSBmCWRGdpSCo1RDq6qI6pj25Q7qQWy6hCXqmRzeLz6JGod4nZX8HknuanWWsVij72pDWoA5Zj9XKcOCAI2xK80DC6mHXTrd8IyPO+uPmQh0nkkohJyfnol+O0wFnoLIcsR3ckY0OIev0+1CMxNYiiRarlaGmJvKe7Zgad7MJ088eiv44OKNfaloiy4/vn+YicN8EuY8k7pPbZx/udKFkmaKelpiOtY2VaOYpiBV2X9Nop9OqGOskNin6+pjr/wsj8jCq5TjnzJlzUS7H6bYUfJVCmaMMJxwnJvQ8suFBAEDCyNSytGpqBLS1Meh0zsVd6utjECVkGPvgdXN6JWoyPo2scLiB5ji1uA440K1sDq88JQ0F/0ZY6OqSVmr6PrCxzMRCGEU/hnsmpTSnGu5O2lRVyPGGluOEc/IP4K8UHHBgEIOTJVJc8e3Z2mzOfxPNeDuavCwyP7Vf4xaU5mJ8MkRKW0I9ulLa0celYj5U4oWiEIbr64HQUwWZ0Qju8svFy9jMkFWfhWCyQX711WMRWxRv/qgojpmws4+fSG6z+92M5v0I9T4PDjK0twvRBTRNMSNs6qTmmwLEOjfPFLTAJwxBkDCNx3jxkbp8mF4Pvrzc78sWAp5n2cGx9aKjhckE1/lD35NQvWnRy3UN8noT2AXXywkCRkcZ+NGxL0MpKo/rHnIY+/dRU82jomLirc/xzEM5csQRNOYTVXU2KzLqgj0JJ06IXKfNioza44DVErxvCkJKAfFP1MZd4HaqmPiTFsHhgq+qAtPrAVuYXF0TGJIaUbkw75W7DiYI/ukfRKdkc+jo4DEyHEbxCjx0uljk1Br/Pevpnbz3PdSzkgoX5RzhzWVZtxYKo0iKC0EAxztc53aeXN6jhcI0DHm3M8CjM7MH9TOn7jr3pBSihDGG3t4xfniyqWkqTIgFEyOtEO3gsH9559+DiUMwsInJHnfggAPlJybO4rCqz2DorTLPb7f4+uQBWDlnz9JkBvr6gJZmZy+UuR4Y55p8Jtd1IaWrEYwDzBLzDtyYEswon33Ye16pxj+Cl4Rz+UEutDEF95W5772iux3Tqz4CjKHfGU7hH5Tifu/SNFXIqDse9nztWZ0YSJ26rumIlcKJEyfwxS9+EWvWrMG///3vWMo0afh+GD29zpjzQFo7HDh9WkBHR/SKYaLcR6bhHmj6z4z5+HG33yIVxMpS4EdH/GLOpfB9hm6Zzk+vwcf82BdyD8Q4AalKBgcEdHUJqDvS5Z8PiwkQIKB+RhNqlKcAAFabq/GXuS4ooNFVjHobmEifgz7CRsk3qpeBgZnNELriE3LqJwdjQR0EgUl/g1IKiucEz3c5IhtEr9AL+ZArEMQUxhUX4mNOGOoB4xgUQ72SD2OqOgxCDjT39PRg9uzZnt87duzAvn37wBjDddddhzvvvDMuAsYT3walU+v622eaupVZcZCVQa5cisutS+IsnZfy6t0wyC2Ym52HJE58DYq4E6M3nP/kEyQPN8OQK55/S68XkJbGiQYT+ysIBi6CRiRedHcJQAITSenMwDgAHAcL59Q+Dt7ZAIkZmtFYUlYrc86XisI/4LmHrkaQP3ECsFjAzZkTfZjqOHpFpaU8srI4rF49cWHjg8nDqJvdhFWKOQCA88kn4BDkUCGCkHuZ+E1symlFf9oAlP2APL0byL0SPHhosjswDQsnTPZYEvL1ePTRR/HrX/8aFtfg1rRp07Bnzx68+uqryMjIiJuA8cD3wzIyI+xMPNLFDDM4mxWOtnfQfG7Uk489UibKUrBxod0XzGSCo6wM5tF+WFh0A1vjadelemSyqoox1SvliSgvF/wH94aCrQpmtqJ9/2lw1si6+HxFBfha5yzthgYeXV1OZTIw4BTGd1JczR/fg0U/cbmifPWW2x3D8wGWAhdwUyJ8sc6fF8awylfAQLNF+p0SBAGa7HYMKyd2pq77/nski8RNFealHkl2PrcRbgDTK8sgM0cxSC/3KqeRJAPOOc4CAPrTfFJm8852pDOpCz0ZfdAqOyOvfxIJqRT+/e9/Y9WqVbjtttvwj3/8A8888wxkMhlMJtMF7z5ijHk+dF9aWSuO8kdxnA/tE+SG9OAEAYmDPejtdY4v2GwCmDHyF0puNoCzmsccUx4O1tsLOBw4qHsDh/jIJotxNqvkwNq4ZBL5MGv4GtTx0rNrmU8DaDSyoIZBZjXBZPAqBb7Fm8bB3Xhbh02wmHgkDYq7Pex2/3Ei1tcH1toKwLk6V1WVc5/Y6l2jwwI6K8RzgYXz03MB+8TGQgSOh52zgued55fJffcCMrslri6IcOG+gfRk6FAzpxEAPAOvk5G1NtzkNTfWvhFwAg+Fvtt/Rzhl62MpVM+tR4fQEfSee4IJXPdtVDGKw47DwbK56O4Wou5oxoKwhuTtt9+O999/H0NDQ/j617+OpUuX4vvf//4FvySm+0N3Kwb3QxMEBkFgMCJMA+/unXAcDAaG06cF1L1TB/7oUTCJkAb3O5Ywqkey+jB41+p20SAwATZZGGWSnOx8saz+jXyf0AeLTLyXl6L+yDmwFoBWK6CmJnwooS7NNfEnXK/N9fb7lmhjbdAwjXj54EMBAEeP8jh5kvfZxzDt/FGkN512LuTCM3Row8nh3xEYYkMYYkOoqhJw+rR0cjhPNRKDsu4B4GgCd9wNChOYnyKszTgO5lIKge6adM05UemipbWVR3eP/3P+1PEpqvlqT3ViCVX7+gScOSPyfgS0dmlt513n8QkXdn1r48H3OdiZHV2CiNIXefdC1udpxSOZ3BCBD86dBt1l8fXLBmGG2We39zyDgwxnzgioqZl8F2fIK9u3bx+uv/56fPGLX8RVV12FkpISvPXWW9iwYQOam5sjqry0tBRLly5Fbm4utm/fHrT/6NGj+MxnPgOFQoE33nhj7FcRJVar82EELgRVWSng7Fmph+KdzeNObezQuUxGEdPayqzQCBrXEQFEsLgCz/w/umYmfu/NzIwDjgPoZjqcOSMELZ9YIVSgIqNS8ny+nDvnnOkcjqacVowm+UdpCE1N/uvWCgKGh8fWAEQS1pkwqkdnp/OjMvusJSzlbz/OH8dx/rhHGUxUuv++Ptf5A9w8RmOYnraPUvDA8+CGmsHcc0Amyv8YUI1eD4815GYYw2hn7fC4jwJPzRgqKgS/WH9mMkLo7Q35xHzHTw4d4vH++5Hf8ER9pzPNtK8IPvfqnHAOVUIVRllkrrwE33TbMq8S7+8XJHVCZ6cQ5NZkTLqzMDDAYLOJl3FbCBO83POYCKkUfv7zn+P999/Hm2++iSeffBLTpk3Djh07sHXrVvzsZz+TrJjneWzatAkHDhxATU0N9u7di5oa/xSxCxYswCuvvIJ77rln/FcSBYHfmO/DlGwYPD0nLuxsT8YY7HaGGqEGdUIdBthAxH04nnf2etuENnzAfwAz874pVmYVNTHdmV5bHU4TeGQkuIwjzFhEOCor/W9K4MvPywTPB8oMBgiNjeArK+FwOHuDTBAQwRrxAJxrAp875z1fuM6kb0+zv99l7fmI2qGNrNEJbPA6OnicPs0HJeHjAmMXo+ToUR4sxPRV3yrdfyr6tEgc7MGosdpPAI9HhGOhdIskXKTTaD0uEC6k+6i21nmfhcozYAFp9buELljlNtc5vbhnzZ88yXuUt1bQol/od+0PsDZazyGj/mSAbN4y7vEzdx4zsTK+pDf7dpCcklktDG1tzNOB8X0v3O+yXs9w9qyApkbpd8t34p8bv2wBUzT8KKRSyMzMRElJCUpKSjBzpndlo8svvxwlJSWSFZeXlyM3NxeLFy9GYmIi1q9fj7ffftuvzKJFi7BixQrIQozkxwpueBBZFaXgXANLfE01+PPnIzvY9SBTtXVh15ytqhJwfFcFWIXz5TMxf6tgZAQwm0Ua7uPH8eY/X8Xrh3rQLTgbeBO8x/b3M2g0AsxmJtozcSu1aG6p73CIMTHYeunpkX55Pb02l8a1GB0oK+NRWSlEFbdeXe1cbcyNxafnn6o5i8yzh4PPCZ8POHAQ1lfGENsTdFpkVZSC2Z0NSl+f8zE3NYWyGgNbYv+fkTW44tIIgoDmnFZYrYDD1TA6BFdDJ2IpmJgJ9jDuRIOBob9/7C4Jqd6vg7PhYP9pZ3CGx1XjPaZKqMLpBefQn+q/ZnFaoxoZNZ9iYIB5DOxzwjmcEk6hq0vAwYN8VFlFQ93zSNpdY6I5RGFvnWVlPD75hEdbW5hZ6lG67pq4Jv8NvANpxw/4beoRetAoNEZV73gJ2XS89dZbMJlMUCgU2LNnT9QVd3Z2Yv78+Z7fKpUKnZ1TY/Rd3uvszchHnC+qMKCHSGxgEAz+DTHnmtzCca6ehM9L1d3NkDTYA8Fg9Rw7KjN4ek2CAFH/Ycc5Pbrk9WhOOoYm12Ql3xd+2PWhuF1gvjQ28tC5MjIGKgU2MgJ3Q9Sh9T+vW7fp0Iez82rRlx5ht95dt2/op8v+rTrLYJAPoUtnhRDB4Fk9Xx808MwGB9HY7JU1aaALcrvXRSelbGqyz2AgRXqOQ1KXa2zDEt52ZwyQWQzgHJbgHWLlRRoJ97MM1F3ua7ExKwZTnFaf290UTsUc4Y/AmOCjyP1ShzN8/DGPU6fEGzK52QDGGGRWE3gh/OQ+3uG8HoE5B+a17c53s1fZiqHEXv8FqkRCf0eV/uN0iSP9UJhHPCK7LWsA6O63wiAfQk2lCWeOhX5+ganRAZF7HoFW6E/th00unVvLaAR63bOzIzHRRDOzev/WyP3H1GR2CwIXPawUKtEkBCiPGBN2Oc7vfe97Y65YzJ871vS7u3btwq5duwBgQtZyYA5XY6twXr7JxGASBCAluOzAAENWlo/cvpfl6pYbjU7LIDFVwNUBa+g1NAiY/gXn3+UpFdAtaMe1mtBrXvf1AbgMSG2vRmuiEtkh0nnr9QxDwwJmT3OLxTAyAiTqARWcvkmDgQHTAEHfD6GpEbJ+HoAqpIvM5BpgNyWEDzsMaow57zaHusI1nsGhPvMEEvlkrBWuC1sfALQw97T/XOc5TCZnTLzrhiuGdQACAhwkPjq73Ib6We1hywOIeKY5Y8D06k/AczywKKJDwtUWXDnEv5vgbZH1SGtrw1sICaN6cEMZSOnQoE3eBOCnIud2/q/VMvBLGNpaBQwMMAxyAhA4q1cAhoYE0TxYHAuMt/I/x7lzAs6aBVx9tQznlafQkTmC+QflsAo8TFffKn5c2KsLuAAJeI4PWVY0AIEJ6EvTI8eQ5SOPfzl3dSEtx8DPaIq4k2Lmt1GpVOjo8C7QodVqMXfu3DBHhGbjxo1Qq9VQq9UTE/lkc/YKuASnUmjVMHR1+T8QxbAOyq5mVL4XsCCN74Oz2wHegQRXDhS3XxsAlN2RDcYfOeIY8wpdvpE4YvK5Y9KtBjM0Gj5sWiAA4Lq9/uCj/3fGO/giJYvP2103MILXE9SwOXqRMNgLm9wMwRG+HkdfP+xaZ+RIwrAOSb2tQVEAMrtX+Hq+HgccB8TdR2E+rCS9+IIxNpkdPek6yQaEMWeSu8ABZD8EAcmdTYA9OMQ3SdcRFIrqW7fzj+B9nGtlLrGmJZzIvu80z3gcv6wCxy+rQFuK9z5wrpeC40P1lH3H2xj0A97tcsMQlH1trl8MfToB/f0MTU3RjdgzBvQ0DiOxX4uqKgEdg87rFVzzhY4dk67Pk2LDz1r3DhrzvPNbc7ukBL90if69/3LVcQwlj4BrqAUzGHD8vX4kdzVCZjEgo+YYlN3N0CV3ozmnFT0ZfT61+D8Mk1HAoUOOkKlHIk72KAjgz5/3SYgYW2KmFAoLC9HY2AiNRgObzYaSkhIUFxfH6nRR4YnmcPlYxD5ypa4dCtMQ0loDwv4YYFPYPfW4P4pAUrqC/YBi74DJFD4ISRCYX0/Df0AyuEKZyDkGBhh43plDJ6QgALhWryJL0muRONQjWq69XaQ3KzB0C92oznSaxCNKA5L0Ws++cNT84wRajjnLpjdVIFXrdCPpdD4frk8IoNuq8L2MsaQ/dlOf3QjNjHYYWfhoMAE8Tl5WiVMLq4L2uceHEkZ0SBzuhVLX4ZG9VWjFIBtEanu19wDmbZzsMoePPz5YgboHJ4PtBeZscHwu2re3KTAGO+ds9K3wKtX2lAlYeY8xZNafgNlwHmAMg3rBo8cHB3zmfPjIMxIQgdYxrQtWmQmMARl1J5A41AvBIYBjzmctcM56xKbzWK0MHe3hAyfOnBE894ZjDCYT0NwsgDGGk5dVojuz17UvuDffldkLmIzgT59GWt1JJHc3I72pEgrzKFK6Gj2p3G1yV/I7kXd81OAMSBzwWVPL15KNdKyNjQyDdXRAqK6WLjwBxEwpKBQK7Ny5E2vXrkVeXh7WrVuHZcuWYfPmzZ71GU6dOgWVSoXXX38djzzyCJYtWxYrcfzxvKgCRph31mVqyxmktIkPONtsDIcP8bCYGWxyGwaThwG73b/xd9XLV/qHfvoOlgJAY44GNrm3J2k2O5fr0+mCGwSeDzOIBq/LwWwRPFtDlZNCbB2F1JYqKM6/gVreOcN3cJAFReUwOBumM8IZ9I4GVyJmCflGUImF4Q0MCGhv98bsM5+v6fx55yxjv4/KvZ/zypY4KK7U3BiNDJWVPCyusN9wqa4BgA8RvdXVxXD4MI+silKfqBavbLVCLU7wzjTL7jkMKS7F1zhLA/XCKs8r6ed6EXG3MsZg8bH4Gur5kI2LJrEWZ6cfAh8YkRMC0SVPfeoWe4+MSSakNZ+G6dARtCudvWbf++hrPfO8M+TzwAEHzAkWaKd3oznNeb98U5C433eBC/08zpwRoGkJ/7yStXVeF44rLE2hCN1xkNtMSGuqgMxixHDyCAxys19hudVnXMS9BgkX+v4EuoMSB3sCLGDnfqtuaq3RHNNFiYuKilBUVOS3bevWrZ6/CwsLodWKzwaNFQ69DoPoQwIS0KRowig/ArPLh84JvOflCWRggMHhAEwWIAHOjwEhZjGzHm9jxMBQf6Ifc5d66+1PGwDjGK7oWwwAUKudb1hdnYDEwLoiaM9rawVUdvOQJZiRpBsCMAe+jVLgTFK51QSjkWFwkEGl8vYLdDoGZPkUZAxJg904fVk9HCwTecgT9SgxLlhO3+bMPfnK9wM6e1bA1VeHzmNTUSEgEz4fmo+lYLU6B/JZRvDNSUgIPT4QaBH29QkQBJ+eaOB9Mo2ATwmf0mU0yYD6rDNY5BoLAZwNv8I0DCNG/cKJR5IMQQ1d4EA4c2fmDDH+Zrc7n589wXk9oWaiW60MQ4m94OxWcKZ++D9Yb8RNv4553rmmJiGglG9DFxD+GnCv+pIHYXG1bd3JzUh1bQ8cv6qrtiFhyNt1Fjj/eQEJwzooHDLYEwBBlg7wwedijOFT68eYZ1F6tnEcBzD/dz25txVgKrTMaIfSnoT0wR4o5s30pA3xvUpfOFcnwayw+s8tgXPs4fzcBsjkVzmPdHdaEA7v+axGH40u0qeZClwyqbMZY/jA8QFeLv0L1AlnMZJkwIhsGEYjg13u35MKjOz56KPgnlZfuh683Yb+1EF0Z/SG7Y0rezUwNvib7GKlRRtc5oz1PuA4gBGbCb29fnsBAN06GwYTe5HSUQOZPbjbHShbkr4Tx47xOHfOeUKeE4JCBgOpqvLGkwdfa3AGS7+9ItFHvteqndYdtD+oDpGvxrdX7ZDbMZAyJDo/Qwzf8RhPz5T5X0dmrXcJ0KEh8RBgTXYHjAmjMMuDo3fUdedwyHEI1dU8Wlt5VM+tDylPKV8KAzOAaxUJP3Q1PBwA3ubwU26p7dUBL5PLWuUBu8yK1LbzyGgoD6oyVOrmhpktaAiR6z+wc+E7adEi9x2w8uk9u/4cduUZUjafQ3pzpd+99H0XkvRaJA10Qdnf7lGgyl6N33lHrBbw+ip02z7wbHPPa/Ctdyh5BObRPvSm69CWpUV6yxn0f1qLPl2Apgr17rJgK384eRSmRBP0Sc5ISs+zYGEscp/6XdlTXBfO0J86CIfM4SwSQg7eDlRU8BG/2+PlklEKdtjBg4cJzp6VTWGHwMRnL3a2+fe+rFZAsDmQ1ngKMpvTqrAqrGhIqUfjzBa0ZmvRldmLxGGn+SwWrWDqOwuINI6akb+gs+dPsDX8A1bmPxKs7GpEv07whPupR/xDNhN1TiurMeUMhhLd/lEONrkdCtMIIDhzqYhdo7sHxxhDe1YnGmdqMKwMPRvU4YjcDQV4mwW5Ycjj3hhRioc9dkwPTk8gdyevc+kChclrYqc1VSC1pQqCIIB3NRxVac2on9UMRwShhYDT8gsK2/Vxz1fNq0XVvFoMD5YhQa9FW5sA+WhwqC7jAIVxCNPOBafm5gQHBMHpV9brg3Yjkfdfs7GbdXuGP+VWo+ea3fddm9wFG+99Rzx9eE+75L0AMyLN7+3/TPWpg5602kJXF6w6pyUjtxhhrSxHT4bTCk5o/gjnQyg5X4PMLZsp0YTEoV5YB/3lSu5sCApEcLg6aYYkI/Qpg55vzrdOmc3s9z6ernDW0arxcdvNbsSnMv88Zsr+DtTWBEdciL7bLLS+cHsUwn0TCuNwyH2u6tE4swXNM9rBHDzSm/yTRnZ2CjAaGQyu9qSzi5TChOMfKsdCjuYb2vzDXjmbBdD3I8E07OdXtHDeXnl7Vif4IaffXSwUsDdlABDJwmhz9MKQOATNjHY0Jp/xe8kUphH0aL0KSifz71GPjjobIitncgaSu2jKaQUAyKxGDA8DdWKZMQUBnMOOoSHApnCeg/exmGxyW5BPtN2hRUNvJ7gA3zvjQn89yT3NaGSNMCss0PtmkBSCk3+549QBQNnj31tVmPw/ME5wQGAM5Ysq0ZijgUXhmg8ihEvsF+Am4AAw5unlN9rboLM6G0FTogmmRBN6WDnM+sPgOCBVE5wmhHECZHYr5A47+tL04plBeYeoGeg7VnT2LA+D0T8NgjsiqGWGM+NoS5rGmY8oAKvJqwjdlo7vWgNtWZ14r0x8DgYXJimjvd2rrBOHelFaX4XWbC0Gk4dxbla4lCni74JD5vC8U+4edoJxCMwg3llomdGOhlnBVgvHQTQN+ugoQ0urvxVgsARfXxcC4v5DvLtWKyc25cIpg3uJVE/kEnBK7X+u4WTndTlCrC/e3u6KDlRYYe7U+0XYAU4Xa11d/HMhxXRMYXwDMOkAACAASURBVKrh6xpgHMNojQbDtjnBMwECXBXTzx2GoLo66FWXGfwbqrPzapHfE+otYt40l+7fAdiYLajnobAY4F4FvLeXwXf1BHc4XKJOi7SBVs92h2uGq8xhR2en4DyXq1qr3AYOQM/Iv7CkNREnFMWwKPwbUl26Ht2ZvVjR6YB78obcMIj3O87CXFmBlf1fAGZ4yzOEj6RoYi0Ymt2GFJu3Z5zSVo2mjOWAT2zB2bMCvuL62yK3IJnjPffD3YD6zrh2T17qTxtAkiMJDgXAWJhGzkfpJfVrYcgcQZrG6645qe3EqeEh3BRwHM/x6OxwIJkFjzcJnLeRa3Yp47nDzkU4lH1tYGwm0jRV4JPTQ8rluf4qHvN7GTAveJ/AOe+EnbcjsNH1NCYcwNy9bsaBs7nGDdIGgKSzoudMELF+AGcD29XmP8bgvn9iA8ADg16ZEkIM8NtlDiS6vsGmHG/U3mh/dKGWAi8EhdDaZFZnNGBXA3w/6N4ue9A43cigf7oMudUoGu/b08NgZky055ww6jT9dOmu/3UCmN7/uZjQg+TaQ+haqkGiyBw8t4LkwKGh3oGQbwhjTutfFrtMxr5cMkqBgcFq9v3tHCe21LWASwz4yBzBmt0u2IN8wnYb5xx19uHMKSumi5w/kpB/nhdgSQhwIfVq4HDMgMXiTI0xUySePGFkEL5HBUYr+ZnZC1whtgLQnJOOaZzTR+qLW6mYE6yeQfjknhbo0q5BGgBNmn8vcUQ5itPcaf9z+ii9liYHMmWCN3soABlvg8EANL3rDflN7mwAcAUAoGruGaQZZcjVLfTsdy+K4qa723svPHWHTXPh03BpT0OYNhTQIWCi4Y8AkNzVKNoYhp2zAG8nVG6WSNTGmKSLzmJmsNt58DJ38q4AS0vmAO/2rTMgRdvg2ZfSooYpsHUMwDfhXEODgDT4jz1E6j5MMIRY1Y1z1nL8Mq+bhAFoagzTIAJBPXlB5Pu0yo2A0Yik/iG/hbESh3qDyiYYh+AI+G5DzT4Pmrgc4h50dzNYUuqBADchsw9BbvF6CDgf2X3vd7gFoNraGPoWnIUyZxquwZqQ5SaKS8Z9xMA8ibsAAJxzhSuxyUQJw31B2wa4YKew2Mzg6VUHPX+bFRZPTwIAmFjMZ4CMVaqaoO1NTd7FUdJaRHp8AS+UX+QPAxKGxHuDdjmP6szQy1RyDGic6R3kk7m06kjA2IMuXY82Y+iZ5u4XngvMvywI0Fd6e4z+DSfzU1YMDEMp/pZZa53XVeOuWQjTSPvuq1hwDkweaCOGcYP1topudzcSvI/C6MoMbojE4PhgDRRKyVgSLLBagbpah0dpB9Kc0wrB4Y2G4QRvOd97aRgVP4dvwjnOHnqmozUh8h6rbziywAmQWf1dqJYEC07NPBSUadeXQDemNkSiQyYwzzNOTg7eXzWvFt0Z4s8m1H3vTGnEictOwy5z3vfuzOC2wVkBg2koeB0WDhzkFu+1JXd7OzUK16RXDoBZ6A/KO5ZVUYqkHu/3Z5i4tZzCcklZCkGhbWHioIOO5yPIsxKwz1chGI1AUqK3ERtIGfLrMQHOgWWx1904ZIPCYoCyVwOZfabbm+TBYYPHZO5PG4DS7grVYwJqG9SYM9QNiERW8jKHn0IZTfL/YAMtjpSOYIXlprt+CNOmifcxlL0aMCTAEJD/RtnTAgb/ZU0FCBhJNnji48/Oq/Xs68nwVzwJHWpn9C0QNOFJDEEWOBbiL6/cbABTJAJICijnlIsPYyloRQbLgdADlQCQ1lwJi6vHysE5Q1ifKp7rpzVbCyDbr0Kx988zphBmrMDBBxm4zmN8xoqUIZQgEDpySYzeHoY0t2xgos+HExzozgwd/TZgr0D7oi5kGafhM3YGTYu4UmjRCFjpuidiCSFNiSa0Zke+wHbtbK9rsSWnDcZEE6yK0ArR7VLyZTREcEUgTbJ3gHlAYdtK/zoNA+CVzgBfhWkIjLExpwuKlEtLKfjQmqUFL4t8Ov5wRR2Q6b8t1AO3yxxoz+oMSizn63cVg7eJ90LS2rwuFveAqpuOjuCPzOJy+Sj72tCNNuTI88TllIsPgLoJVBLhSBroQu+R0Au62+V22AMigxSm4aBw4JOXOV1TkTyaphzvDGx3xErXtNAT1gIbpMCBvaSBLiQO9sImD55EWbmg2m/CoRvPqlpK8XsliHQmPMf6hpa2nUfaoB7a6aGtjIRRPRRs2DMVrTU7OMGk2yrjJaxSMdz3HoDofB33PbbJQyucQOQ+ETjn5tVh3tDsoDLOrABiTlcnToXo7PB8WGpCesNJIKCa5M5G2FMzIHDOJk2WlAQYpZNcuukM894AwfNJAgnl/tGlhQ/1BgC7z8uuyQ5OxaLs927ju7qhmDe2dEGRckm5j3x7WZEqhKp5NdBkt2NIiHwwTL2wKupMo5ES6EIxGBikps749rZ9ETghZC4eIHwDK4bRrxMWmf/5jGrsU/etCpsnJsDdYIdTZIHPXKxnxzEeFQuCXXRiCgHwNuyhLJTqc6EbUGuAgu/NkH5nknTe+S6BY0G+dLIwk0IjmBEps4XOGBsodziSdP6NXG+I7yLSYMuBzr9CFpilFoDcPAJlvxZmdxrsadMilhGAJwx3rPilMIkSi08CSt81nnkuuI3S9U7QSlBhuKQshVCzlcNhSjTDlBj5ckiRpOCdSITRYch4HiGSqUoi5teeCEyJkSnRaKw1MaLJddQxXXqSXLSEc1c5C0Tuogy0pMSQGtgGgAq+ArWm1pD7Ay2kWCLjI7NYjEmRuXX0aYOYMzIz5H73eI5MPoWmCI+R2jnBExnlCbHvx19SlsJ4tHmkRJK/fyI533skKqUViDsHz0QTK0tpPETTw/VlKHnsuWnCWWJjwSGhRM0JFnQNNKK7NbRSllsjdwtGS4LYYIUPjhCup0ifjcAJnueRmuq/z5HqtQ58JzteqIhZvfFQCpeUpRAPNDMmIANlFPRx4pN7liyRY2hQQL8+Ptc9WWRkyjAyHHlvXC6Pfi3m4fEohSgshUjgw6yyBrjccR8ExSLEjXjk8GnPco6lJCdzcDiY6PpYGUnWwOjQSWHi253Y3+BLxlLgO8Rz6cebif5o7ObgFm7OHOdjnTZdJhqaF465c8cuoCN1GhISACbz72skSsTHj4cZOdH5zRRx7gYpe4LX1Qjs4UZDuOiXqYBCAcwbmo006zguMlIYMGuWTxPmM9grlwHz5k1+8xZp9FGkCIFh3TFg8u9anOjqjK+vHwAyRcJAJ1opcCKLufgO27kVRCDp0hNso4cBNlkKkpX+vaPMzNi9yNH6jiMdg8jMlC4TCYH+++xsTtLFMhnMnctNSCOalSXD/MG5Yfuzs0dmIr/78nGfi5MBCUpvp8AvZcfMmRP2rSmV0mXiBZtgy1OMS0YpQBH+UgMffEqK+PZo4EROKfaihmokFi4M3Qt2N1qcT+oFt8zuDlO+cT5kIZaaTFs0CwuvTAvazs+eL1I6MuRWIwQG8A5vy5uSAmTOjr7XOHOmuNy+VkdODhdR671Ir/L8HalSSFA4zy9j0p9IasAyrsu6rwhZlgnAdBaHXrQLd7z+nNmhW8hZozlISZEh9bMrxt2QKpXOCnz94WkBr1muaQ4WKjOg4EObbYHHBDJjBofM3JmQL13q2Sa3OQerFy2SQTZv3oQphenTYtOpSUl2vudz5kjXn5PjWlkugvXOx8sloxSS0sQbWHebkp0t82tf0tI45M5W+punIvhOirWnZ3v+nj2bQ1aWLOjl5kRm1aSncUi3BH8F8jCujhwRt4n7I3BPIs2yh1kLIDUFissXB22WpYT3N82ezTkbejEriLcDHIeZPiumyuUI0oSBt4ADh4UL/TcmJYl/KH5VZWRCtnChaDlfZo14BZqeE1k3PSWVw8qkWbjcGhxXH4gyw9nIJ/KJWMrNxGfmhzbDGBgKk+ZhTYq/8k1NjTqKEoC05anIdL5X4SyqOUMzscQ8B/+/vTMNjuK69vi/e3aNpNk0WkfSaBatowUJIWQkjBSzGZ6wjdiC45dAjCuxy8/Ji53KF0IoY3CCK44rScUvAQeqMDhQcUGMwZSx4/AQq1mMwTEgJNCGJKTRoG3WPu/DoB4NkjDwEBLm/qpUpe6+3fPv033vuUvfc5MVFphMPHSDpgwkJ/OiI5ZIAHNqKJ+MVGG6edLjzc/SkS6H1SpBYUPuiJoSH82GLib0rJKT+bDWrVbLg5PJh9SoOA6QSrl72ySPC8XNSE6+N0VmVBRgTlCgjLcO+66nlZvDHPnA7bhw68ir94JRdQp79+5FRkYGbDYb1q1bN+S4x+PBokWLYLPZUFJSgvqwYOP3Fplq+BJWo+GRmspDpQp/MHZXCir9ORgSCeEmBEnwpUxM5KEblKkjI3kYdGZYjBHiixTpiwh2dt6EoSwHOj5UGGu1QKHeAP6mhdEHa5nROQGWNF5s8hMvAVTBwimQZIakqAiqiGDuTk7mEVsRmimZ05yBEn8JJDIJ4uLC73uKKgvx8RzM5uC1k+LCNURkmpGYKIExVoLYWE4syOQBOXR9GoDjoEoyiC90gleHKX3hszTVasDRnAGTiUdsLIfMBBX0XER4Ps7MRvQtut8SXHGYIpmL2bLHhya6CX7Qay6T8WGF2Ui11XJ6BGXqFKSnhZomtljVsOMB0TpzUJNMjek6MzTRIZupbxrE0NrjEGnORr48Ho7mUC1XpeQQExM675veu9B5of8TEoZ2TWk9eiQ7E1GpexpyORCVGA29LvyZK/0K2IU05EpyEWtIgsEgQWpq8PkrM9LEhZh0ainUEjmMRgnMlelIyo/DSJicCeL/gysBSmXo3iTEj7iqYKXmP1AmnQttnwZxkXIoFNyQPAq1GtPk3wnb9R19MqSCBJXSSky8PnIX1R0V7hFqUfvgAvzmvHO7JCXxMMbLwOXnITagH3b2tTwxFupUo2irgTQ+Gv1u8FFzCoFAAM8//zz27NmDc+fOYevWrTh3LjxMwoYNG6DT6XDx4kX85Cc/wc9//vPRkgNeHd4XN5BXuRwHEjOKAYR/Up4sj4KqrALfMT8N28x0JD6aGXa+Jlj+iTVmpU6FCH+oYE9xG1Gqn4mpLgdkMkAqAaapU8DdlNtL6iZgdtSTeNRaCADQWw0wGHg4JAmAwRCWVqoMFTA8eDx2vQDlPcHZyjzPQWsOOoFMTRpKJaVQlE/DY70lUCg4aAc1gaM9kdDL9SiXPYppSEexNy2oOYUHr4pAZCSP+OgUVBb9J57wFaLsShGyW+woSFOjnCvB4+ZnMSf7v/BEYAKme4K1PVOvEZmtNkSoOfBmM6Im58GYpESGVoMoIULshpHLgccj8zAhOQrz+oqQI4/FZJ8NMpKKBZxUkMIo0SE23YDERA4qFaDpD1YTBVWw5qvv00AG2YhT/nk+6HyMPUEbEhe0e3JcJmS+UEHvlfPD1tAVhnhw4KBSharNUwPpUMhDvzcQTiRFEiwA4/tvXrcM0CjDncJ02zOItueD5znIhVAJbvHZwKWkiE4vYphmotXKw1x00zf6g24/OacQ0TnmsMNlyIKpKwERhkgUPvYIEialQZ8eajklOxORnyeBtLIyqCNjBvi8PCRI9DDIlSiNfgyTEh9HWhqPmBgOZa5spOTNhsKohSTOiOGwWjkUyROR0pkEvVSJsswZ4jGTSQLuhmfNzhq5+FFySmiilMhqtaGkK5j3IiM5KJVASr4BvMOBR3QzECFRo0odcq6ZvAkz0lcEz/eEt74rFKGW8Wzb04jp0SNxmBnWN5OttMPQq0OUTg7e4UBylgZmM48ni390y/P0Om7YbjuVigNvjAHkMkg5Kcq7gzPoB1pkkR411FCDNxhEZzBwFf8ozSsazKh9i3H06FHYbDZYLMEHsXjxYuzcuRPZ2dlimp07d2LVqlUAgOrqarzwwgujFtvDEz38NW1kQ5YQg+aeDnwUCAarmqGwIUYWAU6thjzCBElAiygA6U9NwPm/B0MBVMsKQAaCRBGB/ZYjkKbbYTzIoQ2NkMmAhPKnwPNGCAgW2OY0Ccyd0eACwcKp2FuICZYY8Ale8BwPnT0V5YhBa24ShCYOqi45ylMW4mLN7wAAjkgDSlQ2bOgNBS2TkxTKsnJg91mA5zDLvhBfGmtQoCuF9MZ0f3neBKDlzJDPbpSREsilGkROnIX4aZFw9bwPCAKMkWZE5BYjSzUZGk4DP84iL48Hp4oH59KBF9TgE2+Ehs7OR0xTK4o/5WFLkUEdz8ERPQkeaTaOcEegm5QJ/rALfGIihBvr6T5T9SLUvYTAkeB95JpnA2o10k59jGsJ15HXmgGDOgEq7WTUapPgxb+R0S8FxXPYZ72IbokWbceCX3So1CNXp1dkz4UkKhon3g1Gth0Ye7GYinDuVBMQCM6j4ANexFYUwOD1o3Z3cH3uKHckZKY0SKxWxClkKO1qQZxPgwhSADo94OyAyZmAzhtxipJ1Biz0T4QhKxacWgFqakKeUYsv2ruQqU2HV3EG/W7ClIoFiOBCAxCT85Q4cWMCqyQhBVojh2R7K9QtCuga4tEoXEejrkWcIJclyUKDzQB8HgyHEhvLwdehQB/ciIoCHombi2J3A+pdp7FbE4yrlZTIIU7GQ67iMUn7HbjgQktaC3ou74DPB8yOTApbwjRJakKCJBECdxTockICDSjQB4svDkkeA2QkRZ6qCLLAV6hX1Q+xe1G3DdpUHbQuF5KRBElpNTitFgmuU7iu7MY0Zzokj04CfD6oDh4E5x46m9mSECwd9XEKSNJ5RDkyEK1KRef1KyhK4uFJteC60g8dF3TYSYooJGo42CgWUEWBv1GZUsh45NSl41xWKxILEpDszQBqgp9wRySm4YnkbAj9Hmx0B2fup6byuHxZAMcFW/6qlDhMNVYhKsKIrGwfeEcOjinPozNPAT2nh4KPCdMdE8OFrUmtT40Odm9dDc3ZUSiA2fn/jVpZA+L4OHApVxF16RKSknhES+TQfhZ0ECXSqdgTtQcROiVcbW4oMrKAq+fgv0Vo+HvFqDmFpqYmJCeH+k1NJhOOHDkyYhqpVAqNRoOOjg7ExIQb+15glttQVfIfOLv7C/AywjWHC84GF9LkyZCo5EiUmpAV6cXxviuIVoZqcDJOhgw+A9GIRpu8Ddd0pyAQIbK0Ilgd5TiUtcSgXxmHi/JgSyjKlggjf6MWxfPI7DXB+Mhc8N2EqQ0RON51FJPmT4M0KlIc+eQTElAY/2O0UAuupF2GMmUCVBIlUrQx6BWceFRqgaRwIvgDp0H9oYlJCkUweFuWKR9RfBRK9TPD7puPiYFJV4kWasFCWR68AYKpSAKJOljL5W5Uk6WRGgQQgIyToTIidA3JtGmQcByoqQnC+fNhzSk+JQXypCSU9Hdj4GNxTh0FNaeHg3fgS/mXiHn0CfCSaPAHeAicgEiFLize3MCYgNGnweNdEyF9YrZ4LAMZQEYG/B01gMsFqd0OHYDUVDNyvtRBkxmsocsCMvgkPiiVwVXO8vpTocwvAgBYLeeh1EfgslEH77UuxKqSEa/V4npbMKPG6bXIURagVdGKntRL6Kr1w9GSAblSCk4hhwTApClLEDh4EACQHlWAw9gPQ7QEMn0U6lz9UGoM0MbWgU+MBWcygeLiUBE5FTlf1yA2bSJS5Y9AoAB0kaGYNZLJkyGRy6H77Dh6eoBEqxEJEis8eTlQpfnR39AOw5FaRPVLcDYiOPfFVlyNNAogvtuFYylfIDqah73JjsO+K5ipCy7CIE1OgqWiBzhxI9iiSgWJhANksmDlAzroJDr4J06Hy++E/OjQyhLP8eAKC0FtbeBUKsBkQrazCEiIAt1YDzZLkgU72dEYcRTxCg2UualQXHfDeEUDPi0NwqlT4CdMEN+vaaWPQ52ggdoQAU4mA1Qq8DYbtKdPo/OmiQaPZ/9n8F0ymRDd3Q3eZkOMXI4YFA7RCgDyklLMPSOHrM8HThfqT5PJOEwtNqGg/DGcFE4iWuIAanaKx9WzKgAi5K/vRZ/cjaT4PlzGVciUKqgnZ4GTSqGWGMFxEkjLygAAJSiBj3yQQDKk8jpnyrPYtPN/Qs84IwN6To+Lx/aK+0yaHEiUKqTfCBGPDA3IbkfZJy5okhz4KsUHT1tw3EABBYzZcXjCowWvLsNm+dcQJPdh0R0aJf72t7/R8uXLxe3NmzfTCy+8EJYmOzubGhoaxG2LxULXrl0bcq23336bioqKqKioiFJSUv5fugLd3RTo6SGvp4/arn4Vdszn81BT0wkS+vtJ8PmGPd973UluZ+uwxzz9bjrw6X7yeUPnCm43Cdevh7YFgYTu7jvSLPj94v/OgJM6Ah1h1+3z95IgCN98ne5uEq5dI2EYG/sEH3kF78jnBgIUqKsjIRAY9nigoYECly+T4HaL+wZfr7GhkU58fjx0va4uElyu0LbPN6LNBZ+PhN5ecgtu6hF6hhzvbe+k7qvt5PRco+7uNhI6OkLnulwkeDzk8ndRrzdoL6+7j65cOEE1//sxBQbdj+DxUKC9nQSnc6iG3t7gnyDQ6ZPHyO/zUb+3lxo6vhI13g1+v4/qvZdGfH6e63105uP/pVNfhGwXaGsjf+c18vt9FPB4yH116PM8fOwA1V+uI8Hvp0Bb24i/L3R3U6Cp6a60ExF5nG3k7+sdet0R3pOb6ehupSPHPqK6r89QZ+tFarl+6a50CIJAgfp6EnpDWoSeniHP5cKF83Tqi8/D9vVd7aTOM5eJiOjwkc/oWkcbuQU3tQfav/F3+9o76ODHH5PX5yEiopaGL+mr08fo3Okz1CP0kF/wU23deTp55FP65J8fUm/v0Pd3yH0Egu+CX/CLeSggBKg10Ep9Qt83ahqJoqKi20rHEd1J9Jjb59ChQ1i1ahU++ugjAMDatWsBAL/4xS/ENDNnzsSqVatQWloKv9+P+Ph4tLe337L7aOLEiTh+/PhoSGYwGIxvLbdbdo7aQHNxcTEuXLiAuro6eL1ebNu2DVVVVWFpqqqqsGnTJgDAjh07UFlZOeqxwhkMBoMxMqM2piCVSvH73/8eM2fORCAQwLJly5CTk4OVK1di4sSJqKqqwvLly/G9730PNpsNer0e27ZtGy05DAaDwbgNRq37aLRg3UcMBoNx54x59xGDwWAwHjyYU2AwGAyGCHMKDAaDwRBhToHBYDAYIswpMBgMBkPkgfv6KCYmBmaz+a7ObW9vh9E4fBCv8caDopXpvPc8KFqZznvPaGqtr6/HtWvfvHb6A+cU/j88SJ+zPihamc57z4Oilem894wHraz7iMFgMBgizCkwGAwGQ0SyamBBg4eEoqKisZZw2zwoWpnOe8+DopXpvPeMtdaHakyBwWAwGLeGdR8xGAwGQ+ShcQp79+5FRkYGbDYb1q1bN6ZaGhoaUFFRgaysLOTk5OB3vwsuudnZ2Ynp06fDbrdj+vTpcDqdAAAiwosvvgibzYa8vDycOHHivuoNBAKYMGEC5s6dCwCoq6tDSUkJ7HY7Fi1aBK/XCwDweDxYtGgRbDYbSkpKUF9ff191dnV1obq6GpmZmcjKysKhQ4fGpU1/+9vfIicnBw6HA0uWLIHb7R43Nl22bBliY2PhcDjEfXdjw02bNsFut8Nut4vh8Udb58svv4zMzEzk5eXhySefRFdXl3hs7dq1sNlsyMjIENd4AUa/XBhO5wDr168Hx3HiZ6Jjac8w7noZnwcIv99PFouFamtryePxUF5eHp09e3bM9DQ3N9PnnwdXf7p+/TrZ7XY6e/Ysvfzyy7R27VoiIlq7di298sorRES0e/dumjVrFgmCQIcOHaJJkybdV71vvPEGLVmyhObMmUNERAsWLKCtW7cSEdFzzz1Hf/zjH4mI6A9/+AM999xzRES0detWWrhw4X3V+cwzz9Cf//xnIiLyeDzkdDrHnU0bGxvJbDZTX19wBa0FCxbQO++8M25s+tlnn9Hnn39OOTk54r47tWFHRwelpaVRR0cHdXZ2UlpaGnV2do66zo8++oh8N1Zae+WVV0SdZ8+epby8PHK73XTp0iWyWCzk9/vvS7kwnE4ioitXrtCMGTMoJSWF2tuDK7yNpT0H81A4hZqaGpoxY4a4/dprr9Frr702horCqaqqon379lF6ejo1NzcTUdBxpKenExHRihUr6N133xXTD0432jQ0NFBlZSXt37+f5syZQ4IgkMFgEDPfYNvOmDGDampqiIjI5/ORwWC4rWVC7wUul4vMZvOQ3xtvNm1sbCSTyUQdHR3k8/lozpw5tHfv3nFl07q6urBC7E5t+O6779KKFSvE/TenGy2dg/n73/9O3/3ud4loaH4fsOn9KheG0zl//nw6deoUpaamik5hrO05wEPRfdTU1ITk5GRx22QyoampaQwVhaivr8fJkydRUlKC1tZWJCQEF6NPSEhAW1sbgLHV/9JLL+HXv/41eD74qnR0dECr1UIqlQ7RMlinVCqFRqNBR0fHfdF56dIlGI1G/OAHP8CECRPwwx/+EL29vePOpklJSfjZz36GlJQUJCQkQKPRoKioaFzadIA7teF4yG8bN27E7Nmzx6XOXbt2ISkpCfn5+WH7x4vOh8Ip0DAfWI2HZT97enowf/58vPnmm4iOjh4x3Vjp/+CDDxAbGxv2idyttIylnf1+P06cOIEf/ehHOHnyJNRq9S37iMdKq9PpxM6dO1FXV4fm5mb09vZiz549I2oZr+8uMLK2sda8Zs0aSKVSLF26FMD40tnX14c1a9Zg9erVQ46NF50PhVMwmUxoaGgQtxsbG5GYmDiGigCfz4f58+dj6dKleOqppwAAcXFxaGlpAQC0tLQgNjYWwNjpP3jwIHbt2gWz2YzFixfjk08+wUsvvYSuri74/f4hWgbr9Pv9cLlc0Ov1o65z4LdNJhNKSkoAANXV1Thx4sS4s+nHH3+MtLQ0GI1GyGQyPPXUU6ip3bLu0AAABjdJREFUqRmXNh3gTm04lvlt06ZN+OCDD7Blyxax4BxPOmtra1FXV4f8/HyYzWY0NjaisLAQV69eHTc6HwqnUFxcjAsXLqCurg5erxfbtm1DVVXVmOkhIixfvhxZWVn46U9/Ku6vqqoSvyzYtGkT5s2bJ+7fvHkziAiHDx+GRqMRm/Ojydq1a9HY2Ij6+nps27YNlZWV2LJlCyoqKrBjx45hdQ7o37FjByorK+9bDTE+Ph7Jycn4+uuvAQD79+9Hdnb2uLNpSkoKDh8+jL6+PhCRqHM82nSAO7XhzJkzsW/fPjidTjidTuzbtw8zZ84cdZ179+7F66+/jl27diEiIiJM/7Zt2+DxeFBXV4cLFy5g0qRJY1Iu5Obmoq2tDfX19aivr4fJZMKJEycQHx8/fuw5aqMV44zdu3eT3W4ni8VCr7766phqOXDgAAGg3Nxcys/Pp/z8fNq9ezddu3aNKisryWazUWVlJXV0dBARkSAI9OMf/5gsFgs5HA46duzYfdf86aefil8f1dbWUnFxMVmtVqquria3201ERP39/VRdXU1Wq5WKi4uptrb2vmo8efIkFRUVUW5uLs2bN486OzvHpU1XrlxJGRkZlJOTQ08//TS53e5xY9PFixdTfHw8SaVSSkpKor/85S93ZcMNGzaQ1Wolq9VKGzduvC86rVYrmUwmMU8NfLVFRPTqq6+SxWKh9PR0+vDDD8X9o10uDKdzMIMHmsfSnoNhM5oZDAaDIfJQdB8xGAwG4/ZgToHBYDAYIswpMBgMBkOEOQUGg8FgiDCnwGAwGAwR5hQY3woiIyO/Mc2bb76Jvr6+UdVx6tQpfPjhh8MeO378OF588cU7ut6qVauwfv36eyGNwbgtmFNgPDTcjVMIBAJ3lP5WTmHixIl466237uh6DMb9hjkFxreKf/7zn5g2bZq4rsLSpUtBRHjrrbfQ3NyMiooKVFRUAAD27duH0tJSFBYWYsGCBejp6QEAmM1mrF69GmVlZdi+fTtqa2sxa9YsFBUVoby8HP/+978BANu3b4fD4UB+fj6mTp0Kr9eLlStX4r333kNBQQHee++9IdoG1qRYtWoVli1bhmnTpsFisYQ5izVr1iAjIwOPPfaYOEMbwIg65s2bh82bNwMA3n77bTHmD4NxV4zq1DgG4z6hVquJKDjzOjo6mhoaGigQCNDkyZPpwIEDRBQ+e7S9vZ3Ky8upp6eHiIjWrVtHv/rVr8R0r7/+unjtyspKOn/+PBERHT58mCoqKoiIyOFwUGNjIxEROZ1OIiJ655136Pnnnx9W4+BZ4b/85S+ptLSU3G43tbe3k16vJ6/XS8ePHyeHw0G9vb3kcrnIarXSb37zm1vquHr1KlmtVvrXv/5FdrtdnHHMYNwN0rF2SgzGvWbSpEkwmUwAgIKCAtTX16OsrCwszeHDh3Hu3DlMmTIFAOD1elFaWioeX7RoEYBgJNuamhosWLBAPObxeAAAU6ZMwfe//30sXLhQDGp4J8yZMwcKhQIKhQKxsbFobW3FgQMH8OSTT4qxewZi8dxKR1xcHFavXo2Kigq8//779z1gHuPbBXMKjG8dCoVC/F8ikYjRRwdDRJg+fTq2bt067DXUajUAQBAEaLVanDp1akiaP/3pTzhy5Ah2796NgoKCYdPcjc7hAt7dSgcAnDlzBgaDAc3NzXekgcG4GTamwHhoiIqKQnd3NwBg8uTJOHjwIC5evAggGOf+/PnzQ86Jjo5GWloatm/fDiDoTE6fPg0g2MdfUlKC1atXIyYmBg0NDWG/cTdMnToV77//Pvr7+9Hd3Y1//OMf36jj6NGj2LNnD06ePIn169ejrq7urn+fwWBOgfHQsGLFCsyePRsVFRUwGo3461//iiVLliAvLw+TJ08WB25vZsuWLdiwYQPy8/ORk5ODnTt3AgguFJ+bmwuHw4GpU6ciPz8fFRUVOHfu3LADzbdDYWEhFi1ahIKCAsyfPx/l5eW31OHxePDss89i48aNSExMxBtvvIFly5YNuzALg3E7sCipDAaDwRBhLQUGg8FgiDCnwGAwGAwR5hQYDAaDIcKcAoPBYDBEmFNgMBgMhghzCgwGg8EQYU6BwWAwGCLMKTAYDAZD5P8Ajz0aRJ7kc0gAAAAASUVORK5CYII=\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "ax = plt.subplot(111)\n",
+ "x = pd.np.arange(combined_interests.shape[0])\n",
+ "ax.plot(x, combined_interests.loc[:, '%s_expat'%(audience_var)], label='expat', color=[0.05,0.05,0.95,0.3])\n",
+ "ax.plot(x, combined_interests.loc[:, '%s_US'%(audience_var)], label='US', color=[0.95,0.05,0.05,0.3])\n",
+ "ax.plot(x, combined_interests.loc[:, '%s_MX'%(audience_var)], label='MX', color=[0.05,0.95,0.05,0.3])\n",
+ "ax.set_xlabel('Interest index')\n",
+ "ax.set_ylabel('% of population with interest')\n",
+ "ax.legend(loc='upper right')\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Looks like pretty close overlap on the whole, although the expat counts seem spikier (consistently higher than the other categories on maxima)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 140,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import seaborn as sns\n",
+ "matplotlib.rcParams['lines.markeredgewidth'] = 0\n",
+ "pal = sns.color_palette('Blues')\n",
+ "g = sns.pairplot(combined_interests.iloc[:, 1:], \n",
+ " markers='o', palette=pal, \n",
+ " diag_kind='kde', kind='reg')\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Rough confirmation that the Ex-pats behave more like US than like MX."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 141,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "US R=9.437E-01 (p=0.000E+00)\n",
+ "MX R=7.355E-01 (p=3.327E-245)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from scipy.stats import pearsonr\n",
+ "countries = ['US', 'MX']\n",
+ "for c in countries:\n",
+ " corr, pval = pearsonr(combined_interests.loc[:, '%s_%s'%(audience_var, c)],\n",
+ " combined_interests.loc[:, '%s_expat'%(audience_var)])\n",
+ " print('%s R=%.3E (p=%.3E)'%(c, corr, pval))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that we have established a correlation among expat interests and US interests, we should actually define the assimilation metric and compute that."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# stolen from display_results.py (https://drive.google.com/file/d/1QzX4Re77H7PJrXk84qGHOB5RZ7FKjVzn)\n",
+ "\n",
+ "def global_score(target_file, dest_file, home_file, score_type_, nb_int):\n",
+ " \"\"\"\n",
+ " This function computes the assimilation score for a given target populations coming from some home population and\n",
+ " trying to assimilate to a certain dest population\n",
+ " :param target_file: File containing interests audiences for the target population\n",
+ " :param dest_file: File containing interests audiences for the dest population\n",
+ " :param home_file: File containing interests audiences for the home population\n",
+ " :param score_type_: String indicating if the score should be computed using subtraction or division\n",
+ " :param nb_int: Number of interests to consider\n",
+ " :return: scores: the per-interest assimilation scores for each most german interests\n",
+ " nb_target: the size of the target population\n",
+ " \"\"\"\n",
+ " target_data = pd.read_csv(target_file, index_col=0)\n",
+ " dest_data = pd.read_csv(dest_file, index_col=0)\n",
+ " home_data = pd.read_csv(home_file, index_col=0)\n",
+ "\n",
+ " # Remove hand-picked interests\n",
+ " target_audience = target_data['audience'][0:3000]\n",
+ " dest_audience = dest_data['audience'][0:3000]\n",
+ " home_audience = home_data['audience'][0:3000]\n",
+ "\n",
+ " nb_target = target_audience[0]\n",
+ " nb_dest = dest_audience[0]\n",
+ " nb_home = home_audience[0]\n",
+ "\n",
+ " # Remove erroneous audiences\n",
+ " target_errors = (target_audience != nb_target)\n",
+ " dest_errors = (dest_audience != nb_dest)\n",
+ " home_errors = (home_audience != nb_home)\n",
+ " errors = target_errors | dest_errors | home_errors\n",
+ " target_audience = target_audience[errors]\n",
+ " dest_audience = dest_audience[errors]\n",
+ " home_audience = home_audience[errors]\n",
+ "\n",
+ " # Select a certain number of interests\n",
+ " random.seed(0)\n",
+ " int_ind = random.sample(list(dest_audience.index), nb_int)\n",
+ " int_ind = np.sort(int_ind)\n",
+ "\n",
+ " target_audience = target_audience[int_ind]\n",
+ " dest_audience = dest_audience[int_ind]\n",
+ " home_audience = home_audience[int_ind]\n",
+ "\n",
+ " # Compute activity level\n",
+ " target_nb_interests = target_audience.shape[0]\n",
+ " total_nb_interested_target = target_audience.sum(0)\n",
+ " dest_nb_interests = dest_audience.shape[0]\n",
+ " total_nb_interested_dest = dest_audience.sum(0)\n",
+ " home_nb_interests = home_audience.shape[0]\n",
+ " total_nb_interested_home = home_audience.sum(0)\n",
+ "\n",
+ " # Compute interest ratios\n",
+ " target_ir = target_audience.values / float(total_nb_interested_target)\n",
+ " dest_ir = dest_audience.values / float(total_nb_interested_dest)\n",
+ " home_ir = home_audience.values / float(total_nb_interested_home)\n",
+ "\n",
+ " # Keep only 'dest' interests\n",
+ " dest_indexes = dest_ir > home_ir\n",
+ " g_dest_ir = dest_ir[dest_indexes]\n",
+ " g_home_ir = home_ir[dest_indexes]\n",
+ " g_target_ir = target_ir[dest_indexes]\n",
+ "\n",
+ " # Keep only 'very dest' interests\n",
+ " if score_type_ == '-':\n",
+ " dest_home_perc = np.percentile(g_dest_ir - g_home_ir, TOP_PERC)\n",
+ " very_dest_indexes = (g_dest_ir - g_home_ir) > dest_home_perc\n",
+ " else:\n",
+ " dest_home_perc = np.percentile(g_dest_ir / g_home_ir, TOP_PERC)\n",
+ " very_dest_indexes = ((g_dest_ir / g_home_ir) > dest_home_perc)\n",
+ "\n",
+ " vg_dest_ir = g_dest_ir[very_dest_indexes]\n",
+ " vg_target_ir = g_target_ir[very_dest_indexes]\n",
+ "\n",
+ " # Compute scores\n",
+ " if score_type_ == '-':\n",
+ " scores = vg_target_ir - vg_dest_ir\n",
+ " else:\n",
+ " scores = vg_target_ir / vg_dest_ir\n",
+ "\n",
+ " return scores, nb_target"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def interest_ratio():\n",
+ " pass"
+ ]
}
],
"metadata": {
diff --git a/src/data_processing/get_top_k_interest_query.py b/src/data_processing/get_top_k_interest_query.py
index 7ea7dd6..2676232 100644
--- a/src/data_processing/get_top_k_interest_query.py
+++ b/src/data_processing/get_top_k_interest_query.py
@@ -11,8 +11,8 @@
def main():
parser = ArgumentParser()
# parser.add_argument('--interest_count_file', default='data/all_FB_interests_2016/all_FB_interests_2016.csv')
- parser.add_argument('--interest_sorted_file', default='data/top_interests_complete.json')
- parser.add_argument('--query_file', default='data/queries/US_MX_native_interests.json')
+ parser.add_argument('--interest_sorted_file', default='data/top_interests_complete_clean.json')
+ parser.add_argument('--query_file', default='data/queries/hispanic_MX_expats.json')
parser.add_argument('--top_k', default=3000)
args = parser.parse_args()
# interest_count_file = args.interest_count_file
@@ -38,7 +38,6 @@ def main():
## write
out_file = query_file.replace('.json', '_top_%d_interest.json'%(top_k))
- print(out_file)
json.dump(query, open(out_file, 'w'), indent=4, encoding='latin1')
if __name__ == '__main__':
diff --git a/src/data_processing/mine_facebook_audience.py b/src/data_processing/mine_facebook_audience.py
index 909312a..373ab1f 100644
--- a/src/data_processing/mine_facebook_audience.py
+++ b/src/data_processing/mine_facebook_audience.py
@@ -17,15 +17,18 @@ def main():
# parser.add_argument('--query_file', default='data/hispanic_expat_lang_age.json')
# parser.add_argument('--query_file', default='data/hispanic_lang_age.json')
# parser.add_argument('--query_file', default='data/US_MX_native_interests.json')
- parser.add_argument('--query_file', default='data/queries/US_MX_native_interests_top_3000_interest_new.json')
+# parser.add_argument('--query_file', default='data/queries/US_MX_native_interests_top_3000_interest_new.json')
+ parser.add_argument('--query_file', default='data/queries/hispanic_MX_expats_top_3000_interest.json')
+ parser.add_argument('--interest_file', default='data/top_interests_complete_names.csv')
parser.add_argument('--out_dir', default='data/query_results/')
+ parser.add_argument('--response_file', default=None)
args = parser.parse_args()
query_file = args.query_file
out_dir = args.out_dir
+ response_file = args.response_file
## TEST: try multiple queries at once
extra_auth_files = ['data/facebook_auth_ingmar.csv']
-# extra_auth_files = ['data/facebook_auth.csv',]*2
## temporary: remove interest IDs that we've already queried
# response_file = 'dataframe_collecting_1527334686.csv'
@@ -39,9 +42,7 @@ def main():
# print(tmp_query_file)
# json.dump(leftover_query, open(tmp_query_file, 'w'), indent=4)
- query_and_write(query_file, out_dir, extra_auth_files=extra_auth_files)
-# query_and_write(query_file, out_dir, extra_auth_files=extra_auth_files)
-# query_and_write(query_file, out_dir, extra_auth_files=extra_auth_files, response_file=response_file)
+ query_and_write(query_file, out_dir, extra_auth_files=extra_auth_files, response_file=response_file)
## TODO: periodically copy response to server
## so we can tell when something goes
## wrong even if we're not on the same machine