diff --git a/fromNotebook.py b/fromNotebook.py index 546d21a..9f1e071 100644 --- a/fromNotebook.py +++ b/fromNotebook.py @@ -12,7 +12,7 @@ # Source file directory path_data = "U:/data/" -fn_train = "train3.csv" #"filewos_bib_random_nastates.csv" +fn_train = "train4-womexico.csv" #"train3.csv" #"filewos_bib_random_nastates.csv" trainingdata = pd.read_csv(path_data + fn_train, usecols=['CITY', 'INSTITUTION_FULL','ADDRESS_FULL','d_state']) #,'OBSERVATION' @@ -44,7 +44,7 @@ # 32 states and NA num_labels = 26 # there are only 25 states with institiutions in the dataset -vocab_size = 23000 # how many different words to keep? +vocab_size = 16000 # how many different words to keep? batch_size = 150 from keras.preprocessing.text import Tokenizer @@ -94,14 +94,12 @@ print("\n\n ---------- and now the actual keras training: ----------- \n\n") model = Sequential() -model.add(Dense(int(512/2), input_shape=(vocab_size,))) +model.add(Dense(int(100), input_shape=(vocab_size,))) model.add(Activation('relu')) -model.add(Dropout(0.3)) -#model.add(Conv1D(256, 5, activation='relu')) -#model.add(GlobalMaxPool1D()) -model.add(Dense(int(256/2))) -model.add(Activation('relu')) -model.add(Dropout(0.2)) +model.add(Dropout(0.5)) +#model.add(Dense(int(200))) +#model.add(Activation('relu')) +#model.add(Dropout(0.3)) model.add(Dense(num_labels)) model.add(Activation('softmax')) model.summary() @@ -112,7 +110,7 @@ history = model.fit(x_train, y_train, batch_size=batch_size, - epochs=15, + epochs=10, verbose=1, validation_split=0.2)