From e2c531fc5c3c08ce3fd095c4fa5c04275168e893 Mon Sep 17 00:00:00 2001 From: Crista Lopes Date: Tue, 26 Nov 2019 00:02:24 -0800 Subject: [PATCH] Learn how to normalize one character at a time --- 36-dnn/normalize-char-ff.py | 102 ++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 36-dnn/normalize-char-ff.py diff --git a/36-dnn/normalize-char-ff.py b/36-dnn/normalize-char-ff.py new file mode 100644 index 0000000..f661fcf --- /dev/null +++ b/36-dnn/normalize-char-ff.py @@ -0,0 +1,102 @@ +from keras.models import Model, Sequential +from keras import layers +from keras.layers import Input, Dense +from keras.utils import plot_model + +import numpy as np +import sys, os, string, random + +characters = sorted(string.printable) +char_indices = dict((c, i) for i, c in enumerate(characters)) +indices_char = dict((i, c) for i, c in enumerate(characters)) + +INPUT_VOCAB_SIZE = len(characters) +BATCH_SIZE = 200 + +def encode_one_hot(c): + """One-hot encode the given character. + """ + x = np.zeros((INPUT_VOCAB_SIZE)) + index = char_indices[c] + x[index] = 1 + return x + +def decode_one_hot(x): + """Return a character from a one-hot-encoded vector + """ + one_index = np.argmax(x) + c = indices_char[one_index] + return c + +def build_model(): + print('Build model...') + model = Sequential() + model.add(layers.Dense(INPUT_VOCAB_SIZE, input_shape=(INPUT_VOCAB_SIZE, ), activation='softmax')) + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + return model + +def input_generator(nsamples): + def generate_char(): + input_data = random.choice(characters) + expected = input_data.lower() if input_data in string.ascii_letters else ' ' + return input_data, expected + + while True: + data_in = np.zeros((nsamples, INPUT_VOCAB_SIZE)) + data_out = np.zeros((nsamples, INPUT_VOCAB_SIZE)) + for n in range(nsamples): + input_data, expected = generate_char() + data_in[n] = encode_one_hot(input_data) + data_out[n] = encode_one_hot(expected) + + yield data_in, data_out + +model = build_model() +model.summary() +plot_model(model, to_file='normalization.png', show_shapes=True) + +# Train the model each generation and show predictions against a dataset. +val_gen2 = input_generator(4) +for iteration in range(1, 500): + print() + print('-' * 50) + print('Iteration', iteration) + input_gen = input_generator(BATCH_SIZE) + val_gen = input_generator(BATCH_SIZE) + model.fit_generator(input_gen, + epochs = 1, + steps_per_epoch = 20, + validation_data = val_gen, + validation_steps = 10, workers=1) + # Select samples from the a set at random so we can visualize errors. + batch_x, batch_y = next(val_gen2) + for i in range(len(batch_y)): + preds = model.predict(batch_x) + expected = batch_y[i] + prediction = preds[i] + + correct = decode_one_hot(expected) + guess = decode_one_hot(prediction) + print('T', correct) + print('G', guess) + +#with open(sys.argv[1]) as f: +# for line in f: +# if line.isspace(): continue +# onehots = encode_one_hot(line) + +# data = [[] for _ in range(LINE_SIZE)] +# for i, c in enumerate(onehots): +# data[i].append(c) +# for j in range(len(onehots), LINE_SIZE): +# data[j].append(np.zeros((INPUT_VOCAB_SIZE))) + +# inputs = [np.array(e) for e in data] + +# preds = model.predict(inputs) +# normal = decode_one_hot(preds[0]) + +# print(decode_one_hot(onehots)) +# print(normal)