Add a version of this that works on single characters, and batches on the line. It's much simpler to explain.

2019-12-26 16:42:33 -08:00
parent 997c2827b1
commit b855384437
1 changed files with 71 additions and 0 deletions
--- a/35-dumb-filters/tf-35.py
+++ b/35-dumb-filters/tf-35.py
@@ -0,0 +1,71 @@
 from keras.models import Sequential
 from keras.layers import Dense
 from keras.utils import plot_model
 import numpy as np
 import sys, os, string
 characters = string.printable
 char_indices = dict((c, i) for i, c in enumerate(characters))
 indices_char = dict((i, c) for i, c in enumerate(characters))
 INPUT_VOCAB_SIZE = len(characters)
 LINE_SIZE = 100
 def encode_one_hot(line):
    x = np.zeros((len(line), INPUT_VOCAB_SIZE))
    for i, c in enumerate(line):
        if c in characters:
            index = char_indices[c]
        else:
            index = char_indices[' ']
        x[i][index] = 1 
    return x
 def decode_one_hot(x):
    s = []
    for onehot in x:
        one_index = np.argmax(onehot) 
        s.append(indices_char[one_index]) 
    return ''.join(s)
 def normalization_layer_set_weights(n_layer):
    wb = []
    b = np.zeros((INPUT_VOCAB_SIZE), dtype=np.float32)
    w = np.zeros((INPUT_VOCAB_SIZE, INPUT_VOCAB_SIZE), dtype=np.float32)
    # Let lower case letters go through
    for c in string.ascii_lowercase:
        i = char_indices[c]
        w[i, i] = 1
    # Map capitals to lower case
    for c in string.ascii_uppercase:
        i = char_indices[c]
        il = char_indices[c.lower()]
        w[i, il] = 1
    # Map all non-letters to space
    sp_idx = char_indices[' ']
    for c in [c for c in list(string.printable) if c not in list(string.ascii_letters)]:
        i = char_indices[c]
        w[i, sp_idx] = 1
    wb.append(w)
    wb.append(b)
    n_layer.set_weights(wb)
    return n_layer
 def build_model():
    # Normalize characters using a dense layer
    model = Sequential()
    dense_layer = Dense(INPUT_VOCAB_SIZE, input_shape=(INPUT_VOCAB_SIZE,))
    model.add(dense_layer)
    normalization_layer_set_weights(dense_layer)
    return model
 model = build_model()
 with open(sys.argv[1]) as f:
    for line in f:
        if line.isspace(): continue
        batch = encode_one_hot(line)
        preds = model.predict(batch)
        normal = decode_one_hot(preds)
        print(normal)