Skip to content

Commit

Permalink
input was wrong
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom Theile committed Aug 26, 2019
1 parent 4e8c604 commit b516c32
Showing 1 changed file with 48 additions and 23 deletions.
71 changes: 48 additions & 23 deletions fromNotebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,20 @@
import pickle
from pathlib import Path


import matplotlib.pyplot as plt

print("-------------start-------------")
# For reproducibility
np.random.seed(1237)

# Source file directory
path_data = "U:/data/"
fn_train = "train.csv" #"filewos_bib_random_nastates.csv"
fn_train = "train3.csv" #"filewos_bib_random_nastates.csv"


trainingdata = pd.read_csv(path_data + fn_train, usecols=['CITY', 'INSTITUTION_FULL','ADDRESS_FULL','d_state']) #,'OBSERVATION'
# print(trainingdata.head(20))
trainingdata = trainingdata[0:1500]
print(trainingdata.head(10))


df = trainingdata.fillna('noInput')
Expand All @@ -41,24 +42,27 @@
#test_files_names = dtrainingata['filename'][train_size:]

# 32 states and NA
num_labels = 25
vocab_size = 5000
num_labels = 23 # there are only 25 states with institiutions in the dataset
vocab_size = 20000
batch_size = 100


from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout
from keras.constraints import maxnorm
from keras.utils import plot_model

print(train_input.head(10))
print(train_tags[0:20])
train_input.to_csv('inputprocessed.csv')

#print(train_input.head(10))
#print(train_tags[0:20])
#train_input.to_csv('inputprocessed.csv')

# define Tokenizer with Vocab Size
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(train_input)

x_train = tokenizer.texts_to_matrix(train_input, mode='tfidf')
x_train.head(10)
x_test = tokenizer.texts_to_matrix(test_input, mode='tfidf')

#import sci-kit...
Expand All @@ -72,32 +76,49 @@
y_test = encoder.transform(test_tags)


#exit()

print("and now the actual keras training:")
print("\n\n ---------- and now the actual keras training: ----------- \n\n")

model = Sequential()
model.add(Dense(512, input_shape=(vocab_size,)))
model.add(Dense(int(512/2), input_shape=(vocab_size,), kernel_constraint=maxnorm(4)))
model.add(Activation('relu'))
model.add(Dropout(0.3))
model.add(Dense(512))
model.add(Dropout(0.5))
model.add(Dense(int(256/2), kernel_constraint=maxnorm(4)))
model.add(Activation('relu'))
model.add(Dropout(0.3))
model.add(Dropout(0.5))
model.add(Dense(num_labels))
model.add(Activation('softmax'))
model.summary()

model.compile(loss='categorical_crossentropy',
model.compile(loss='categorical_crossentropy',#'mean_squared_error', #'categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])

history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=48,
epochs=15,
verbose=1,
validation_split=0.1)



validation_split=0.2)


# Plot training & validation accuracy values
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

score = model.evaluate(x_test, y_test,
batch_size=batch_size, verbose=1)
Expand All @@ -106,12 +127,16 @@

text_labels = encoder.classes_

for i in range(100):
plot_model(model, to_file='model.png')

for i in range(50):
prediction = model.predict(np.array([x_test[i]]))
predicted_label = text_labels[np.argmax(prediction[0])]
#print(test_files_names.iloc[i])
print('Actual label:' + test_tags.iloc[i])
print("Predicted label: " + predicted_label)
#print(f"\n {i} input-text: {train_input[i]} " )#+ train_input[i])
#print(f"x_train {i}: {x_train[i]},\n y_train i: {y_train[i]}")
print('--Actual label:' + test_tags.iloc[i])
print("Predicted label: " + predicted_label + "--")



0 comments on commit b516c32

Please sign in to comment.