diff --git a/35-dumb-filters/tf-35.py b/35-dumb-filters/tf-35.py new file mode 100644 index 0000000..e742ca3 --- /dev/null +++ b/35-dumb-filters/tf-35.py @@ -0,0 +1,71 @@ +from keras.models import Sequential +from keras.layers import Dense +from keras.utils import plot_model +import numpy as np +import sys, os, string + +characters = string.printable +char_indices = dict((c, i) for i, c in enumerate(characters)) +indices_char = dict((i, c) for i, c in enumerate(characters)) + +INPUT_VOCAB_SIZE = len(characters) +LINE_SIZE = 100 + +def encode_one_hot(line): + x = np.zeros((len(line), INPUT_VOCAB_SIZE)) + for i, c in enumerate(line): + if c in characters: + index = char_indices[c] + else: + index = char_indices[' '] + x[i][index] = 1 + return x + +def decode_one_hot(x): + s = [] + for onehot in x: + one_index = np.argmax(onehot) + s.append(indices_char[one_index]) + return ''.join(s) + +def normalization_layer_set_weights(n_layer): + wb = [] + b = np.zeros((INPUT_VOCAB_SIZE), dtype=np.float32) + w = np.zeros((INPUT_VOCAB_SIZE, INPUT_VOCAB_SIZE), dtype=np.float32) + # Let lower case letters go through + for c in string.ascii_lowercase: + i = char_indices[c] + w[i, i] = 1 + # Map capitals to lower case + for c in string.ascii_uppercase: + i = char_indices[c] + il = char_indices[c.lower()] + w[i, il] = 1 + # Map all non-letters to space + sp_idx = char_indices[' '] + for c in [c for c in list(string.printable) if c not in list(string.ascii_letters)]: + i = char_indices[c] + w[i, sp_idx] = 1 + + wb.append(w) + wb.append(b) + n_layer.set_weights(wb) + return n_layer + +def build_model(): + # Normalize characters using a dense layer + model = Sequential() + dense_layer = Dense(INPUT_VOCAB_SIZE, input_shape=(INPUT_VOCAB_SIZE,)) + model.add(dense_layer) + normalization_layer_set_weights(dense_layer) + return model + +model = build_model() + +with open(sys.argv[1]) as f: + for line in f: + if line.isspace(): continue + batch = encode_one_hot(line) + preds = model.predict(batch) + normal = decode_one_hot(preds) + print(normal) \ No newline at end of file