Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-------------start-------------\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CITY</th>\n",
" <th>INSTITUTION_FULL</th>\n",
" <th>ADDRESS_FULL</th>\n",
" <th>d_state</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A CORUNA</td>\n",
" <td>UNIV A CORUNA, GRP INGN FLUIDOS</td>\n",
" <td>UNIV A CORUNA, GRP INGN FLUIDOS, A CORUNA, MEXICO</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AC</td>\n",
" <td>PREVENCASA</td>\n",
" <td>PREVENCASA, AC, TIJUANA, MEXICO</td>\n",
" <td>BAJA CALIFORNIA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>AC</td>\n",
" <td>CTR DIVERSIDAD &amp; DERECHOS SEXUALES</td>\n",
" <td>CTR DIVERSIDAD &amp; DERECHOS SEXUALES, AC, MEXICO</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>AC</td>\n",
" <td>FEDERAC HEMOFILIA REPUBL MEXICANA</td>\n",
" <td>FEDERAC HEMOFILIA REPUBL MEXICANA, AC, MEXICO</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>AC</td>\n",
" <td>INST ECOL, DIV POSGRAD</td>\n",
" <td>INST ECOL, DIV POSGRAD, AC, MEXICO</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>AC</td>\n",
" <td>INST ECOL, RED BIOL EVOLUT</td>\n",
" <td>INST ECOL, RED BIOL EVOLUT, AC, MEXICO</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>AC DELEG BENITO JUAREZ</td>\n",
" <td>UNIV VALLE MEXICO, INST INVEST &amp; INNOVAC FARMA...</td>\n",
" <td>UNIV VALLE MEXICO, INST INVEST &amp; INNOVAC FARMA...</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>AC DELEG BENITO JUAREZ</td>\n",
" <td>UNIV VALLE MEXICO, INST INVEST &amp; INNOVAC FARMA...</td>\n",
" <td>UNIV VALLE MEXICO, INST INVEST &amp; INNOVAC FARMA...</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>AC GUADALAJARA</td>\n",
" <td>CTR INVEST &amp; ASISTENCIA TECNOL &amp; DISENO ESTADO JA</td>\n",
" <td>CTR INVEST &amp; ASISTENCIA TECNOL &amp; DISENO ESTADO...</td>\n",
" <td>JALISCO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>AC HERMOSILLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO, AC HERMOSIL...</td>\n",
" <td>SONORA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>AC HERMOSILLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO, AC HERMOSIL...</td>\n",
" <td>SONORA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>AC HERMOSILLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO, AC HERMOSIL...</td>\n",
" <td>SONORA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>AC HERMOSILLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO, AC HERMOSIL...</td>\n",
" <td>SONORA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>AC HERMOSILLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO, AC HERMOSIL...</td>\n",
" <td>SONORA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>AC HERMOSILLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO, AC HERMOSIL...</td>\n",
" <td>SONORA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>AC HERMOSILLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO</td>\n",
" <td>CTR INVEST ALIMENTAC &amp; DESARROLLO, AC HERMOSIL...</td>\n",
" <td>SONORA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>AC MEXICO</td>\n",
" <td>INST ECOL</td>\n",
" <td>INST ECOL, AC MEXICO, MEXICO</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>AC MEXICO</td>\n",
" <td>MEXICAN HLTH FDN</td>\n",
" <td>MEXICAN HLTH FDN, AC MEXICO, MEXICO</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>AC MEXICO</td>\n",
" <td>TOMATELO PECHO</td>\n",
" <td>TOMATELO PECHO, AC MEXICO, MEXICO</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>ACAPULCO</td>\n",
" <td>ACAPULCO ONCOL GRP, DEPT ONCOL &amp; CLIN RES</td>\n",
" <td>ACAPULCO ONCOL GRP, DEPT ONCOL &amp; CLIN RES, ACA...</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>ACAPULCO</td>\n",
" <td>INST TECNOL ACAPULCO, COMP LAB</td>\n",
" <td>INST TECNOL ACAPULCO, COMP LAB, AV INST TECNOL...</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>ACAPULCO</td>\n",
" <td>INST TECNOL ACAPULCO, COMP LAB</td>\n",
" <td>INST TECNOL ACAPULCO, COMP LAB, AV INST TECNOL...</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>ACAPULCO</td>\n",
" <td>STATE LAB PUBL HLTH</td>\n",
" <td>STATE LAB PUBL HLTH, ACAPULCO, MEXICO</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>ACAPULCO</td>\n",
" <td>PROJECTO COMUNITARIA DERMATOL</td>\n",
" <td>PROJECTO COMUNITARIA DERMATOL, ACAPULCO, MEXICO</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>ACAPULCO</td>\n",
" <td>CTR INVEST CLIN PACIFICO</td>\n",
" <td>CTR INVEST CLIN PACIFICO, ACAPULCO, MEXICO</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>ACAPULCO</td>\n",
" <td>UNIV NACL AUTONOMA MEXICO, INST MATEMAT</td>\n",
" <td>UNIV NACL AUTONOMA MEXICO, INST MATEMAT, ACAPU...</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>ACAPULCO</td>\n",
" <td>UNIV NACL AUTONOMA MEXICO, INST MATEMAT</td>\n",
" <td>UNIV NACL AUTONOMA MEXICO, INST MATEMAT, ACAPU...</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>ACAPULCO</td>\n",
" <td>UAG, FAC MATEMAT</td>\n",
" <td>UAG, FAC MATEMAT, ACAPULCO, MEXICO</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>ACAPULCO</td>\n",
" <td>UAG, FAC MATEMAT</td>\n",
" <td>UAG, FAC MATEMAT, ACAPULCO, MEXICO</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>ACAPULCO</td>\n",
" <td>HOSP GEN ACAPULCO, AREA QUIRURG, SERV ENFERMERIA</td>\n",
" <td>HOSP GEN ACAPULCO, SERV ENFERMERIA, AREA QUIRU...</td>\n",
" <td>GUERRERO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1470</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>FEDERAC MEXICANA ASOCIAC PRIVADAS FEMAP</td>\n",
" <td>FEDERAC MEXICANA ASOCIAC PRIVADAS FEMAP, CIUDA...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1471</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>SALUD &amp; DESAROLLO COMUNITARIO CIUDAD JUAREZ AC SA</td>\n",
" <td>SALUD &amp; DESAROLLO COMUNITARIO CIUDAD JUAREZ AC...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1472</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>FEDERAC MEXICANA ASOCIAC PRIVADAS</td>\n",
" <td>FEDERAC MEXICANA ASOCIAC PRIVADAS, CIUDAD JUAR...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1473</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>SALUD &amp; DESARROLLO COMUNITARIO CIUDAD JUAREZ AC</td>\n",
" <td>SALUD &amp; DESARROLLO COMUNITARIO CIUDAD JUAREZ A...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1474</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, FAC MED</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, FAC MED, CIUDAD J...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1475</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>FED MEXICANA ASOCIAC PRIVADAS</td>\n",
" <td>FED MEXICANA ASOCIAC PRIVADAS, CIUDAD JUAREZ, ...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1476</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>SALUD &amp; DESAROLLO COMUNITARIO CIDUDAD JUAREZ AC</td>\n",
" <td>SALUD &amp; DESAROLLO COMUNITARIO CIDUDAD JUAREZ A...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1477</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>AUTONOMOUS UNIV CIUDAD JUAREZ, DEPT BASIC SCI,...</td>\n",
" <td>AUTONOMOUS UNIV CIUDAD JUAREZ, INST BIOMED SCI...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1478</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>AUTONOMOUS UNIV CIUDAD JUAREZ, DEPT BASIC SCI,...</td>\n",
" <td>AUTONOMOUS UNIV CIUDAD JUAREZ, INST BIOMED SCI...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1479</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>PROGRAMA COMPANEROS</td>\n",
" <td>PROGRAMA COMPANEROS, CIUDAD JUAREZ, MEXICO</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1480</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>PROGRAMA COMPANEROS</td>\n",
" <td>PROGRAMA COMPANEROS, CIUDAD JUAREZ, MEXICO</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1481</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN &amp; MFG</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN &amp; MFG, C...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1482</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN &amp; MFG</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN &amp; MFG, C...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1483</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, CIUDAD JUAREZ, ME...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1484</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN &amp; MFG</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN &amp; MFG, C...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1485</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>ESCUELA SUPER PSICOL, HUMAN RESOURCE DEV &amp; BUS...</td>\n",
" <td>ESCUELA SUPER PSICOL, HUMAN RESOURCE DEV &amp; BUS...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1486</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, CIUDAD JUAREZ, ME...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1487</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, INST INGN TECHNOL</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, INST INGN TECHNOL...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1488</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>AUTONOMOUS UNIV CIUDAD JUAREZ</td>\n",
" <td>AUTONOMOUS UNIV CIUDAD JUAREZ, CIUDAD JUAREZ, ...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1489</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>MED UNIT HLTH</td>\n",
" <td>MED UNIT HLTH, CIUDAD JUAREZ, MEXICO</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1490</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>MED UNIT COMMUNITY DEV CIUDAD JUAREZ</td>\n",
" <td>MED UNIT COMMUNITY DEV CIUDAD JUAREZ, CIUDAD J...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1491</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, DEPT ELECT COMP</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, DEPT ELECT COMP, ...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1492</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, CIUDAD JUAREZ, ME...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1493</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, CIUDAD JUAREZ, ME...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1494</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, CIUDAD JUAREZ, ME...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1495</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, FAC MED</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, FAC MED, CIUDAD J...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1496</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>SALUD &amp; DESAROLLO COMUNITARIO CIUDAD JUAREZ AC</td>\n",
" <td>SALUD &amp; DESAROLLO COMUNITARIO CIUDAD JUAREZ AC...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1497</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA</td>\n",
" <td>UNIV AUTONOMA, CIUDAD JUAREZ, MEXICO</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1498</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, INST CIENCIAS BIO...</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, LAB CIENCIAS AMBI...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1499</th>\n",
" <td>CIUDAD JUAREZ</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, PROGRAMA MED VET ...</td>\n",
" <td>UNIV AUTONOMA CIUDAD JUAREZ, PROGRAMA MED VET ...</td>\n",
" <td>CHIHUAHUA</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1500 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" CITY \\\n",
"0 A CORUNA \n",
"1 AC \n",
"2 AC \n",
"3 AC \n",
"4 AC \n",
"5 AC \n",
"6 AC DELEG BENITO JUAREZ \n",
"7 AC DELEG BENITO JUAREZ \n",
"8 AC GUADALAJARA \n",
"9 AC HERMOSILLO \n",
"10 AC HERMOSILLO \n",
"11 AC HERMOSILLO \n",
"12 AC HERMOSILLO \n",
"13 AC HERMOSILLO \n",
"14 AC HERMOSILLO \n",
"15 AC HERMOSILLO \n",
"16 AC MEXICO \n",
"17 AC MEXICO \n",
"18 AC MEXICO \n",
"19 ACAPULCO \n",
"20 ACAPULCO \n",
"21 ACAPULCO \n",
"22 ACAPULCO \n",
"23 ACAPULCO \n",
"24 ACAPULCO \n",
"25 ACAPULCO \n",
"26 ACAPULCO \n",
"27 ACAPULCO \n",
"28 ACAPULCO \n",
"29 ACAPULCO \n",
"... ... \n",
"1470 CIUDAD JUAREZ \n",
"1471 CIUDAD JUAREZ \n",
"1472 CIUDAD JUAREZ \n",
"1473 CIUDAD JUAREZ \n",
"1474 CIUDAD JUAREZ \n",
"1475 CIUDAD JUAREZ \n",
"1476 CIUDAD JUAREZ \n",
"1477 CIUDAD JUAREZ \n",
"1478 CIUDAD JUAREZ \n",
"1479 CIUDAD JUAREZ \n",
"1480 CIUDAD JUAREZ \n",
"1481 CIUDAD JUAREZ \n",
"1482 CIUDAD JUAREZ \n",
"1483 CIUDAD JUAREZ \n",
"1484 CIUDAD JUAREZ \n",
"1485 CIUDAD JUAREZ \n",
"1486 CIUDAD JUAREZ \n",
"1487 CIUDAD JUAREZ \n",
"1488 CIUDAD JUAREZ \n",
"1489 CIUDAD JUAREZ \n",
"1490 CIUDAD JUAREZ \n",
"1491 CIUDAD JUAREZ \n",
"1492 CIUDAD JUAREZ \n",
"1493 CIUDAD JUAREZ \n",
"1494 CIUDAD JUAREZ \n",
"1495 CIUDAD JUAREZ \n",
"1496 CIUDAD JUAREZ \n",
"1497 CIUDAD JUAREZ \n",
"1498 CIUDAD JUAREZ \n",
"1499 CIUDAD JUAREZ \n",
"\n",
" INSTITUTION_FULL \\\n",
"0 UNIV A CORUNA, GRP INGN FLUIDOS \n",
"1 PREVENCASA \n",
"2 CTR DIVERSIDAD & DERECHOS SEXUALES \n",
"3 FEDERAC HEMOFILIA REPUBL MEXICANA \n",
"4 INST ECOL, DIV POSGRAD \n",
"5 INST ECOL, RED BIOL EVOLUT \n",
"6 UNIV VALLE MEXICO, INST INVEST & INNOVAC FARMA... \n",
"7 UNIV VALLE MEXICO, INST INVEST & INNOVAC FARMA... \n",
"8 CTR INVEST & ASISTENCIA TECNOL & DISENO ESTADO JA \n",
"9 CTR INVEST ALIMENTAC & DESARROLLO \n",
"10 CTR INVEST ALIMENTAC & DESARROLLO \n",
"11 CTR INVEST ALIMENTAC & DESARROLLO \n",
"12 CTR INVEST ALIMENTAC & DESARROLLO \n",
"13 CTR INVEST ALIMENTAC & DESARROLLO \n",
"14 CTR INVEST ALIMENTAC & DESARROLLO \n",
"15 CTR INVEST ALIMENTAC & DESARROLLO \n",
"16 INST ECOL \n",
"17 MEXICAN HLTH FDN \n",
"18 TOMATELO PECHO \n",
"19 ACAPULCO ONCOL GRP, DEPT ONCOL & CLIN RES \n",
"20 INST TECNOL ACAPULCO, COMP LAB \n",
"21 INST TECNOL ACAPULCO, COMP LAB \n",
"22 STATE LAB PUBL HLTH \n",
"23 PROJECTO COMUNITARIA DERMATOL \n",
"24 CTR INVEST CLIN PACIFICO \n",
"25 UNIV NACL AUTONOMA MEXICO, INST MATEMAT \n",
"26 UNIV NACL AUTONOMA MEXICO, INST MATEMAT \n",
"27 UAG, FAC MATEMAT \n",
"28 UAG, FAC MATEMAT \n",
"29 HOSP GEN ACAPULCO, AREA QUIRURG, SERV ENFERMERIA \n",
"... ... \n",
"1470 FEDERAC MEXICANA ASOCIAC PRIVADAS FEMAP \n",
"1471 SALUD & DESAROLLO COMUNITARIO CIUDAD JUAREZ AC SA \n",
"1472 FEDERAC MEXICANA ASOCIAC PRIVADAS \n",
"1473 SALUD & DESARROLLO COMUNITARIO CIUDAD JUAREZ AC \n",
"1474 UNIV AUTONOMA CIUDAD JUAREZ, FAC MED \n",
"1475 FED MEXICANA ASOCIAC PRIVADAS \n",
"1476 SALUD & DESAROLLO COMUNITARIO CIDUDAD JUAREZ AC \n",
"1477 AUTONOMOUS UNIV CIUDAD JUAREZ, DEPT BASIC SCI,... \n",
"1478 AUTONOMOUS UNIV CIUDAD JUAREZ, DEPT BASIC SCI,... \n",
"1479 PROGRAMA COMPANEROS \n",
"1480 PROGRAMA COMPANEROS \n",
"1481 UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN & MFG \n",
"1482 UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN & MFG \n",
"1483 UNIV AUTONOMA CIUDAD JUAREZ \n",
"1484 UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN & MFG \n",
"1485 ESCUELA SUPER PSICOL, HUMAN RESOURCE DEV & BUS... \n",
"1486 UNIV AUTONOMA CIUDAD JUAREZ \n",
"1487 UNIV AUTONOMA CIUDAD JUAREZ, INST INGN TECHNOL \n",
"1488 AUTONOMOUS UNIV CIUDAD JUAREZ \n",
"1489 MED UNIT HLTH \n",
"1490 MED UNIT COMMUNITY DEV CIUDAD JUAREZ \n",
"1491 UNIV AUTONOMA CIUDAD JUAREZ, DEPT ELECT COMP \n",
"1492 UNIV AUTONOMA CIUDAD JUAREZ \n",
"1493 UNIV AUTONOMA CIUDAD JUAREZ \n",
"1494 UNIV AUTONOMA CIUDAD JUAREZ \n",
"1495 UNIV AUTONOMA CIUDAD JUAREZ, FAC MED \n",
"1496 SALUD & DESAROLLO COMUNITARIO CIUDAD JUAREZ AC \n",
"1497 UNIV AUTONOMA \n",
"1498 UNIV AUTONOMA CIUDAD JUAREZ, INST CIENCIAS BIO... \n",
"1499 UNIV AUTONOMA CIUDAD JUAREZ, PROGRAMA MED VET ... \n",
"\n",
" ADDRESS_FULL d_state \n",
"0 UNIV A CORUNA, GRP INGN FLUIDOS, A CORUNA, MEXICO NaN \n",
"1 PREVENCASA, AC, TIJUANA, MEXICO BAJA CALIFORNIA \n",
"2 CTR DIVERSIDAD & DERECHOS SEXUALES, AC, MEXICO NaN \n",
"3 FEDERAC HEMOFILIA REPUBL MEXICANA, AC, MEXICO CIUDAD DE MEXICO \n",
"4 INST ECOL, DIV POSGRAD, AC, MEXICO CIUDAD DE MEXICO \n",
"5 INST ECOL, RED BIOL EVOLUT, AC, MEXICO CIUDAD DE MEXICO \n",
"6 UNIV VALLE MEXICO, INST INVEST & INNOVAC FARMA... CIUDAD DE MEXICO \n",
"7 UNIV VALLE MEXICO, INST INVEST & INNOVAC FARMA... CIUDAD DE MEXICO \n",
"8 CTR INVEST & ASISTENCIA TECNOL & DISENO ESTADO... JALISCO \n",
"9 CTR INVEST ALIMENTAC & DESARROLLO, AC HERMOSIL... SONORA \n",
"10 CTR INVEST ALIMENTAC & DESARROLLO, AC HERMOSIL... SONORA \n",
"11 CTR INVEST ALIMENTAC & DESARROLLO, AC HERMOSIL... SONORA \n",
"12 CTR INVEST ALIMENTAC & DESARROLLO, AC HERMOSIL... SONORA \n",
"13 CTR INVEST ALIMENTAC & DESARROLLO, AC HERMOSIL... SONORA \n",
"14 CTR INVEST ALIMENTAC & DESARROLLO, AC HERMOSIL... SONORA \n",
"15 CTR INVEST ALIMENTAC & DESARROLLO, AC HERMOSIL... SONORA \n",
"16 INST ECOL, AC MEXICO, MEXICO CIUDAD DE MEXICO \n",
"17 MEXICAN HLTH FDN, AC MEXICO, MEXICO CIUDAD DE MEXICO \n",
"18 TOMATELO PECHO, AC MEXICO, MEXICO CIUDAD DE MEXICO \n",
"19 ACAPULCO ONCOL GRP, DEPT ONCOL & CLIN RES, ACA... GUERRERO \n",
"20 INST TECNOL ACAPULCO, COMP LAB, AV INST TECNOL... GUERRERO \n",
"21 INST TECNOL ACAPULCO, COMP LAB, AV INST TECNOL... GUERRERO \n",
"22 STATE LAB PUBL HLTH, ACAPULCO, MEXICO GUERRERO \n",
"23 PROJECTO COMUNITARIA DERMATOL, ACAPULCO, MEXICO GUERRERO \n",
"24 CTR INVEST CLIN PACIFICO, ACAPULCO, MEXICO GUERRERO \n",
"25 UNIV NACL AUTONOMA MEXICO, INST MATEMAT, ACAPU... GUERRERO \n",
"26 UNIV NACL AUTONOMA MEXICO, INST MATEMAT, ACAPU... GUERRERO \n",
"27 UAG, FAC MATEMAT, ACAPULCO, MEXICO GUERRERO \n",
"28 UAG, FAC MATEMAT, ACAPULCO, MEXICO GUERRERO \n",
"29 HOSP GEN ACAPULCO, SERV ENFERMERIA, AREA QUIRU... GUERRERO \n",
"... ... ... \n",
"1470 FEDERAC MEXICANA ASOCIAC PRIVADAS FEMAP, CIUDA... CHIHUAHUA \n",
"1471 SALUD & DESAROLLO COMUNITARIO CIUDAD JUAREZ AC... CHIHUAHUA \n",
"1472 FEDERAC MEXICANA ASOCIAC PRIVADAS, CIUDAD JUAR... CHIHUAHUA \n",
"1473 SALUD & DESARROLLO COMUNITARIO CIUDAD JUAREZ A... CHIHUAHUA \n",
"1474 UNIV AUTONOMA CIUDAD JUAREZ, FAC MED, CIUDAD J... CHIHUAHUA \n",
"1475 FED MEXICANA ASOCIAC PRIVADAS, CIUDAD JUAREZ, ... CHIHUAHUA \n",
"1476 SALUD & DESAROLLO COMUNITARIO CIDUDAD JUAREZ A... CHIHUAHUA \n",
"1477 AUTONOMOUS UNIV CIUDAD JUAREZ, INST BIOMED SCI... CHIHUAHUA \n",
"1478 AUTONOMOUS UNIV CIUDAD JUAREZ, INST BIOMED SCI... CHIHUAHUA \n",
"1479 PROGRAMA COMPANEROS, CIUDAD JUAREZ, MEXICO CHIHUAHUA \n",
"1480 PROGRAMA COMPANEROS, CIUDAD JUAREZ, MEXICO CHIHUAHUA \n",
"1481 UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN & MFG, C... CHIHUAHUA \n",
"1482 UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN & MFG, C... CHIHUAHUA \n",
"1483 UNIV AUTONOMA CIUDAD JUAREZ, CIUDAD JUAREZ, ME... CHIHUAHUA \n",
"1484 UNIV AUTONOMA CIUDAD JUAREZ, IND ENGN & MFG, C... CHIHUAHUA \n",
"1485 ESCUELA SUPER PSICOL, HUMAN RESOURCE DEV & BUS... CHIHUAHUA \n",
"1486 UNIV AUTONOMA CIUDAD JUAREZ, CIUDAD JUAREZ, ME... CHIHUAHUA \n",
"1487 UNIV AUTONOMA CIUDAD JUAREZ, INST INGN TECHNOL... CHIHUAHUA \n",
"1488 AUTONOMOUS UNIV CIUDAD JUAREZ, CIUDAD JUAREZ, ... CHIHUAHUA \n",
"1489 MED UNIT HLTH, CIUDAD JUAREZ, MEXICO CHIHUAHUA \n",
"1490 MED UNIT COMMUNITY DEV CIUDAD JUAREZ, CIUDAD J... CHIHUAHUA \n",
"1491 UNIV AUTONOMA CIUDAD JUAREZ, DEPT ELECT COMP, ... CHIHUAHUA \n",
"1492 UNIV AUTONOMA CIUDAD JUAREZ, CIUDAD JUAREZ, ME... CHIHUAHUA \n",
"1493 UNIV AUTONOMA CIUDAD JUAREZ, CIUDAD JUAREZ, ME... CHIHUAHUA \n",
"1494 UNIV AUTONOMA CIUDAD JUAREZ, CIUDAD JUAREZ, ME... CHIHUAHUA \n",
"1495 UNIV AUTONOMA CIUDAD JUAREZ, FAC MED, CIUDAD J... CHIHUAHUA \n",
"1496 SALUD & DESAROLLO COMUNITARIO CIUDAD JUAREZ AC... CHIHUAHUA \n",
"1497 UNIV AUTONOMA, CIUDAD JUAREZ, MEXICO CHIHUAHUA \n",
"1498 UNIV AUTONOMA CIUDAD JUAREZ, LAB CIENCIAS AMBI... CHIHUAHUA \n",
"1499 UNIV AUTONOMA CIUDAD JUAREZ, PROGRAMA MED VET ... CHIHUAHUA \n",
"\n",
"[1500 rows x 4 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\t\n",
"import pandas as pd\n",
"import numpy as np\n",
"import pickle\n",
"from pathlib import Path\n",
"\n",
"\n",
"\n",
"print(\"-------------start-------------\")\n",
"# For reproducibility\n",
"np.random.seed(1237)\n",
" \n",
"# Source file directory\n",
"path_data = \"U:/data/\"\n",
"fn_train = \"train3.csv\" #\"filewos_bib_random_nastates.csv\"\n",
" \n",
"\n",
"trainingdata = pd.read_csv(path_data + fn_train, usecols=['CITY', 'INSTITUTION_FULL','ADDRESS_FULL','d_state']) #,'OBSERVATION'\n",
"#print(trainingdata.head(20))\n",
"trainingdata = trainingdata[0:1500] \n",
"trainingdata"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CITY</th>\n",
" <th>INSTITUTION_FULL</th>\n",
" <th>ADDRESS_FULL</th>\n",
" <th>d_state</th>\n",
" <th>merged</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A CORUNA</td>\n",
" <td>UNIV A CORUNA, GRP INGN FLUIDOS</td>\n",
" <td>UNIV A CORUNA, GRP INGN FLUIDOS, A CORUNA, MEXICO</td>\n",
" <td>noInput</td>\n",
" <td>A CORUNA UNIV A CORUNA, GRP INGN FLUIDOS UNIV ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AC</td>\n",
" <td>PREVENCASA</td>\n",
" <td>PREVENCASA, AC, TIJUANA, MEXICO</td>\n",
" <td>BAJA CALIFORNIA</td>\n",
" <td>AC PREVENCASA PREVENCASA, AC, TIJUANA, MEXICO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>AC</td>\n",
" <td>CTR DIVERSIDAD &amp; DERECHOS SEXUALES</td>\n",
" <td>CTR DIVERSIDAD &amp; DERECHOS SEXUALES, AC, MEXICO</td>\n",
" <td>noInput</td>\n",
" <td>AC CTR DIVERSIDAD &amp; DERECHOS SEXUALES CTR DIVE...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>AC</td>\n",
" <td>FEDERAC HEMOFILIA REPUBL MEXICANA</td>\n",
" <td>FEDERAC HEMOFILIA REPUBL MEXICANA, AC, MEXICO</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" <td>AC FEDERAC HEMOFILIA REPUBL MEXICANA FEDERAC H...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>AC</td>\n",
" <td>INST ECOL, DIV POSGRAD</td>\n",
" <td>INST ECOL, DIV POSGRAD, AC, MEXICO</td>\n",
" <td>VERACRUZ DE IGNACIO DE LA LLAVE</td>\n",
" <td>AC INST ECOL, DIV POSGRAD INST ECOL, DIV POSGR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>AC</td>\n",
" <td>INST ECOL, RED BIOL EVOLUT</td>\n",
" <td>INST ECOL, RED BIOL EVOLUT, AC, MEXICO</td>\n",
" <td>VERACRUZ DE IGNACIO DE LA LLAVE</td>\n",
" <td>AC INST ECOL, RED BIOL EVOLUT INST ECOL, RED B...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>AC DELEG BENITO JUAREZ</td>\n",
" <td>UNIV VALLE MEXICO, INST INVEST &amp; INNOVAC FARMA...</td>\n",
" <td>UNIV VALLE MEXICO, INST INVEST &amp; INNOVAC FARMA...</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" <td>AC DELEG BENITO JUAREZ UNIV VALLE MEXICO, INST...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>AC DELEG BENITO JUAREZ</td>\n",
" <td>UNIV VALLE MEXICO, INST INVEST &amp; INNOVAC FARMA...</td>\n",
" <td>UNIV VALLE MEXICO, INST INVEST &amp; INNOVAC FARMA...</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" <td>AC DELEG BENITO JUAREZ UNIV VALLE MEXICO, INST...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>AC GUADALAJARA</td>\n",
" <td>CTR INVEST &amp; ASISTENCIA TECNOL &amp; DISENO ESTADO JA</td>\n",
" <td>CTR INVEST &amp; ASISTENCIA TECNOL &amp; DISENO ESTADO...</td>\n",
" <td>JALISCO</td>\n",
" <td>AC GUADALAJARA CTR INVEST &amp; ASISTENCIA TECNOL ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CITY INSTITUTION_FULL \\\n",
"0 A CORUNA UNIV A CORUNA, GRP INGN FLUIDOS \n",
"1 AC PREVENCASA \n",
"2 AC CTR DIVERSIDAD & DERECHOS SEXUALES \n",
"3 AC FEDERAC HEMOFILIA REPUBL MEXICANA \n",
"4 AC INST ECOL, DIV POSGRAD \n",
"5 AC INST ECOL, RED BIOL EVOLUT \n",
"6 AC DELEG BENITO JUAREZ UNIV VALLE MEXICO, INST INVEST & INNOVAC FARMA... \n",
"7 AC DELEG BENITO JUAREZ UNIV VALLE MEXICO, INST INVEST & INNOVAC FARMA... \n",
"8 AC GUADALAJARA CTR INVEST & ASISTENCIA TECNOL & DISENO ESTADO JA \n",
"\n",
" ADDRESS_FULL \\\n",
"0 UNIV A CORUNA, GRP INGN FLUIDOS, A CORUNA, MEXICO \n",
"1 PREVENCASA, AC, TIJUANA, MEXICO \n",
"2 CTR DIVERSIDAD & DERECHOS SEXUALES, AC, MEXICO \n",
"3 FEDERAC HEMOFILIA REPUBL MEXICANA, AC, MEXICO \n",
"4 INST ECOL, DIV POSGRAD, AC, MEXICO \n",
"5 INST ECOL, RED BIOL EVOLUT, AC, MEXICO \n",
"6 UNIV VALLE MEXICO, INST INVEST & INNOVAC FARMA... \n",
"7 UNIV VALLE MEXICO, INST INVEST & INNOVAC FARMA... \n",
"8 CTR INVEST & ASISTENCIA TECNOL & DISENO ESTADO... \n",
"\n",
" d_state \\\n",
"0 noInput \n",
"1 BAJA CALIFORNIA \n",
"2 noInput \n",
"3 CIUDAD DE MEXICO \n",
"4 VERACRUZ DE IGNACIO DE LA LLAVE \n",
"5 VERACRUZ DE IGNACIO DE LA LLAVE \n",
"6 CIUDAD DE MEXICO \n",
"7 CIUDAD DE MEXICO \n",
"8 JALISCO \n",
"\n",
" merged \n",
"0 A CORUNA UNIV A CORUNA, GRP INGN FLUIDOS UNIV ... \n",
"1 AC PREVENCASA PREVENCASA, AC, TIJUANA, MEXICO \n",
"2 AC CTR DIVERSIDAD & DERECHOS SEXUALES CTR DIVE... \n",
"3 AC FEDERAC HEMOFILIA REPUBL MEXICANA FEDERAC H... \n",
"4 AC INST ECOL, DIV POSGRAD INST ECOL, DIV POSGR... \n",
"5 AC INST ECOL, RED BIOL EVOLUT INST ECOL, RED B... \n",
"6 AC DELEG BENITO JUAREZ UNIV VALLE MEXICO, INST... \n",
"7 AC DELEG BENITO JUAREZ UNIV VALLE MEXICO, INST... \n",
"8 AC GUADALAJARA CTR INVEST & ASISTENCIA TECNOL ... "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"df = trainingdata.fillna('noInput')\n",
"#merge the input-columns into one big string column 'merged'\n",
"df[\"merged\"] = df[\"CITY\"].map(str) + ' ' + df[\"INSTITUTION_FULL\"] + ' ' + df[\"ADDRESS_FULL\"]\n",
"\n",
"df.head(9)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# prepare data for keras:\n",
"\n",
"# lets take 80% data as training and remaining 20% for test.\n",
"train_size = int(len(trainingdata) * .8)\n",
" \n",
"train_input = trainingdata['merged'][:train_size]\n",
"train_tags = trainingdata['d_state'][:train_size]\n",
"#train_files_names = trainingdata['filename'][:train_size]\n",
" \n",
"test_input = trainingdata['merged'][train_size:]\n",
"test_tags = trainingdata['d_state'][train_size:]\n",
"#test_files_names = dtrainingata['filename'][train_size:]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1762 CIUDAD REAL UNIVERSIDAD PANAMERICANA, MCS UNIV...\n",
"1763 NaN\n",
"1764 CIUDAD UNIV UNIV NACL AUTONOMA MEXICO, INST IN...\n",
"Name: merged, dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_input[1762:1765]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CITY</th>\n",
" <th>INSTITUTION_FULL</th>\n",
" <th>ADDRESS_FULL</th>\n",
" <th>d_state</th>\n",
" <th>merged</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1762</th>\n",
" <td>CIUDAD REAL</td>\n",
" <td>UNIVERSIDAD PANAMERICANA, MCS</td>\n",
" <td>UNIVERSIDAD PANAMERICANA, MCS, CIUDAD REAL, ME...</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" <td>CIUDAD REAL UNIVERSIDAD PANAMERICANA, MCS UNIV...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1763</th>\n",
" <td>CIUDAD SATELITE</td>\n",
" <td>NaN</td>\n",
" <td>PAFNUNCIO PADILLA 17,4TOPISO, CIUDAD SATELITE,...</td>\n",
" <td>ESTADO DE MEXICO</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1764</th>\n",
" <td>CIUDAD UNIV</td>\n",
" <td>UNIV NACL AUTONOMA MEXICO, INST INGN</td>\n",
" <td>UNIV NACL AUTONOMA MEXICO, INST INGN, CIUDAD U...</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" <td>CIUDAD UNIV UNIV NACL AUTONOMA MEXICO, INST IN...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CITY INSTITUTION_FULL \\\n",
"1762 CIUDAD REAL UNIVERSIDAD PANAMERICANA, MCS \n",
"1763 CIUDAD SATELITE NaN \n",
"1764 CIUDAD UNIV UNIV NACL AUTONOMA MEXICO, INST INGN \n",
"\n",
" ADDRESS_FULL d_state \\\n",
"1762 UNIVERSIDAD PANAMERICANA, MCS, CIUDAD REAL, ME... CIUDAD DE MEXICO \n",
"1763 PAFNUNCIO PADILLA 17,4TOPISO, CIUDAD SATELITE,... ESTADO DE MEXICO \n",
"1764 UNIV NACL AUTONOMA MEXICO, INST INGN, CIUDAD U... CIUDAD DE MEXICO \n",
"\n",
" merged \n",
"1762 CIUDAD REAL UNIVERSIDAD PANAMERICANA, MCS UNIV... \n",
"1763 NaN \n",
"1764 CIUDAD UNIV UNIV NACL AUTONOMA MEXICO, INST IN... "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[1762:1765]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CITY</th>\n",
" <th>INSTITUTION_FULL</th>\n",
" <th>ADDRESS_FULL</th>\n",
" <th>d_state</th>\n",
" <th>merged</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1762</th>\n",
" <td>CIUDAD REAL</td>\n",
" <td>UNIVERSIDAD PANAMERICANA, MCS</td>\n",
" <td>UNIVERSIDAD PANAMERICANA, MCS, CIUDAD REAL, ME...</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" <td>CIUDAD REAL UNIVERSIDAD PANAMERICANA, MCS UNIV...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1763</th>\n",
" <td>CIUDAD SATELITE</td>\n",
" <td>noInput</td>\n",
" <td>PAFNUNCIO PADILLA 17,4TOPISO, CIUDAD SATELITE,...</td>\n",
" <td>ESTADO DE MEXICO</td>\n",
" <td>noInput</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1764</th>\n",
" <td>CIUDAD UNIV</td>\n",
" <td>UNIV NACL AUTONOMA MEXICO, INST INGN</td>\n",
" <td>UNIV NACL AUTONOMA MEXICO, INST INGN, CIUDAD U...</td>\n",
" <td>CIUDAD DE MEXICO</td>\n",
" <td>CIUDAD UNIV UNIV NACL AUTONOMA MEXICO, INST IN...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CITY INSTITUTION_FULL \\\n",
"1762 CIUDAD REAL UNIVERSIDAD PANAMERICANA, MCS \n",
"1763 CIUDAD SATELITE noInput \n",
"1764 CIUDAD UNIV UNIV NACL AUTONOMA MEXICO, INST INGN \n",
"\n",
" ADDRESS_FULL d_state \\\n",
"1762 UNIVERSIDAD PANAMERICANA, MCS, CIUDAD REAL, ME... CIUDAD DE MEXICO \n",
"1763 PAFNUNCIO PADILLA 17,4TOPISO, CIUDAD SATELITE,... ESTADO DE MEXICO \n",
"1764 UNIV NACL AUTONOMA MEXICO, INST INGN, CIUDAD U... CIUDAD DE MEXICO \n",
"\n",
" merged \n",
"1762 CIUDAD REAL UNIVERSIDAD PANAMERICANA, MCS UNIV... \n",
"1763 noInput \n",
"1764 CIUDAD UNIV UNIV NACL AUTONOMA MEXICO, INST IN... "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = df.fillna('noInput')\n",
"df2[1762:1765]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'Tokenizer' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-5-8e9e2e4beeb7>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;31m# define Tokenizer with Vocab Size\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mtokenizer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTokenizer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnum_words\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mvocab_size\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_on_texts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain_posts\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'Tokenizer' is not defined"
]
}
],
"source": [
"# 32 states and NA\n",
"num_labels = 33\n",
"vocab_size = 5000\n",
"batch_size = 100\n",
" \n",
"# define Tokenizer with Vocab Size\n",
"tokenizer = Tokenizer(num_words=vocab_size)\n",
"tokenizer.fit_on_texts(train_posts)\n",
" \n",
"x_train = tokenizer.texts_to_matrix(train_posts, mode='tfidf')\n",
"x_test = tokenizer.texts_to_matrix(test_posts, mode='tfidf')\n",
" \n",
"encoder = LabelBinarizer()\n",
"encoder.fit(train_tags)\n",
"y_train = encoder.transform(train_tags)\n",
"y_test = encoder.transform(test_tags)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'keras'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-11-2c9953c194d7>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpreprocessing\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtext\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mTokenizer\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodels\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mSequential\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mActivation\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDense\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDropout\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'keras'"
]
}
],
"source": [
"\n",
"\n",
"from keras.preprocessing.text import Tokenizer\n",
"from keras.models import Sequential\n",
"from keras.layers import Activation, Dense, Dropout\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}