Skip to content

Commit

Permalink
0.65, it doesnt get better
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom Theile committed Aug 26, 2019
1 parent b728327 commit d5acc3a
Showing 1 changed file with 21 additions and 7 deletions.
28 changes: 21 additions & 7 deletions fromNotebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


trainingdata = pd.read_csv(path_data + fn_train, usecols=['CITY', 'INSTITUTION_FULL','ADDRESS_FULL','d_state']) #,'OBSERVATION'
trainingdata = trainingdata[0:1500]
trainingdata = trainingdata[0:2200]
#trainingdata.head(10)


Expand All @@ -26,7 +26,8 @@

print(df.head(9))
df.to_csv('merged.csv')
df.reindex(np.random.permutation(df.index)) # shuffle the rows, because I suspect some row-dependence...

df.reindex(np.random.permutation(df.index)) # shuffle the rows, because I suspect some row-dependence... undo for debugging

# prepare data for keras:

Expand All @@ -42,15 +43,15 @@
#test_files_names = dtrainingata['filename'][train_size:]

# 32 states and NA
num_labels = 23 # there are only 25 states with institiutions in the dataset
vocab_size = 3000 # how many different words to keep?
num_labels = 26 # there are only 25 states with institiutions in the dataset
vocab_size = 23000 # how many different words to keep?
batch_size = 150

from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout
from keras.layers import Activation, Dense, Dropout, Conv1D, MaxPooling1D,GlobalMaxPool1D
from keras.constraints import maxnorm
from keras.utils import plot_model
from keras.utils import plot_model, to_categorical


#print(train_input.head(10))
Expand All @@ -77,14 +78,27 @@
y_test = encoder.transform(test_tags)


#print("train tags_ ", train_tags[0:20])
#y_train = to_categorical(train_tags, num_labels)
#y_test = to_categorical(test_tags, num_labels)

text_labels = encoder.classes_

print("y_train: ", y_train[0:10])
for ls in range(10):
print("y_train argmax",(np.argmax(y_train[ls])))
print("y_test", y_test[0:10])
print("labels: ", text_labels)
#exit()

print("\n\n ---------- and now the actual keras training: ----------- \n\n")

model = Sequential()
model.add(Dense(int(512/2), input_shape=(vocab_size,)))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dropout(0.3))
#model.add(Conv1D(256, 5, activation='relu'))
#model.add(GlobalMaxPool1D())
model.add(Dense(int(256/2)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
Expand Down

0 comments on commit d5acc3a

Please sign in to comment.