Change the name of this style to dense, because it's focusing on the dense layer
This commit is contained in:
112
35-dense/count-words-binary-encoding-no-learning.py
Normal file
112
35-dense/count-words-binary-encoding-no-learning.py
Normal file
@@ -0,0 +1,112 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''
|
||||
# An implementation of deep learning for counting symbols
|
||||
Input: [10, 12, 10, 11, 2, 2, 2, 1, 1]
|
||||
Output: words=[2, 10, 1, 12, 11] counts=[3, 2, 2, 1, 1] (Not necessarily in this order)
|
||||
|
||||
''' # noqa
|
||||
|
||||
from __future__ import print_function
|
||||
from keras.models import Sequential, Model
|
||||
from keras import layers, metrics
|
||||
from keras import backend as K
|
||||
from keras.utils import plot_model
|
||||
from keras.utils import to_categorical
|
||||
import numpy as np
|
||||
import math
|
||||
from six.moves import range
|
||||
import string, re, collections, os, sys, operator
|
||||
|
||||
stopwords = set(open('../stop_words.txt').read().split(','))
|
||||
all_words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
||||
words = [w for w in all_words if w not in stopwords]
|
||||
|
||||
uniqs = [''] + list(set(words))
|
||||
uniqs_indices = dict((w, i) for i, w in enumerate(uniqs))
|
||||
indices_uniqs = dict((i, w) for i, w in enumerate(uniqs))
|
||||
|
||||
indices = [uniqs_indices[w] for w in words]
|
||||
|
||||
WORDS_SIZE = len(words)
|
||||
VOCAB_SIZE = len(uniqs)
|
||||
BIN_SIZE = math.ceil(math.log(VOCAB_SIZE, 2))
|
||||
|
||||
def encode_binary(W):
|
||||
x = np.zeros((1, WORDS_SIZE, BIN_SIZE, 1))
|
||||
for i, w in enumerate(W):
|
||||
for n in range(BIN_SIZE):
|
||||
n2 = pow(2, n)
|
||||
x[0, i, n, 0] = 1 if (w & n2) == n2 else 0
|
||||
return x
|
||||
|
||||
print(f'Words size {WORDS_SIZE}, vocab size {VOCAB_SIZE}, bin size {BIN_SIZE}')
|
||||
#print(f'Words={words}')
|
||||
#print(f'Uniqs={uniqs}')
|
||||
#print(f'Indices={indices}')
|
||||
|
||||
def set_weights(clayer):
|
||||
wb = []
|
||||
b = np.zeros((VOCAB_SIZE), dtype=np.float32)
|
||||
w = np.zeros((1, BIN_SIZE, 1, VOCAB_SIZE), dtype=np.float32)
|
||||
for i in range(VOCAB_SIZE):
|
||||
for n in range(BIN_SIZE):
|
||||
n2 = pow(2, n)
|
||||
w[0][n][0][i] = 1 if (i & n2) == n2 else -1 #-(BIN_SIZE-1)
|
||||
for i in range(VOCAB_SIZE):
|
||||
slice_1 = w[0, :, 0, i]
|
||||
n_ones = len(slice_1[ slice_1 == 1 ])
|
||||
if n_ones > 0: slice_1[ slice_1 == 1 ] = 1./n_ones
|
||||
n_ones = len(slice_1[ slice_1 == -1 ])
|
||||
if n_ones > 0: slice_1[ slice_1 == -1 ] = -1./n_ones
|
||||
# Scale the whole thing down one order of magnitude
|
||||
#w = w * 0.1
|
||||
wb.append(w)
|
||||
wb.append(b)
|
||||
clayer.set_weights(wb)
|
||||
|
||||
def Max(x):
|
||||
zeros = K.zeros_like(x)
|
||||
return K.switch(K.less(x, 0.9), zeros, x)
|
||||
|
||||
def sigmoid_steep(x):
|
||||
base = K.ones_like(x) * pow(10, 20)
|
||||
return 1. / (1. + K.pow(base, -x))
|
||||
|
||||
def Max2(x):
|
||||
return sigmoid_steep(x - (1-1/BIN_SIZE)) * x
|
||||
|
||||
def Reduce(x):
|
||||
return K.pow(x, 15)
|
||||
|
||||
def SumPooling2D(x):
|
||||
return K.sum(x, axis = 1)
|
||||
|
||||
def model_convnet2D():
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
model.add(layers.Conv2D(VOCAB_SIZE, (1, BIN_SIZE), input_shape=(WORDS_SIZE, BIN_SIZE, 1)))
|
||||
set_weights(model.layers[0])
|
||||
model.add(layers.ReLU(threshold=1-1/BIN_SIZE))
|
||||
# model.add(layers.Lambda(Max))
|
||||
# model.add(layers.Lambda(Max2))
|
||||
# model.add(layers.Lambda(Reduce))
|
||||
model.add(layers.Lambda(SumPooling2D))
|
||||
model.add(layers.Reshape((VOCAB_SIZE,)))
|
||||
|
||||
return model, "words-nolearning-{}v-{}f".format(VOCAB_SIZE, BIN_SIZE)
|
||||
|
||||
|
||||
model, name = model_convnet2D()
|
||||
model.summary()
|
||||
plot_model(model, to_file=name + '.png', show_shapes=True)
|
||||
|
||||
batch_x = encode_binary(indices)
|
||||
|
||||
intermediate_model = Model(inputs=model.input, outputs=[l.output for l in model.layers])
|
||||
preds = intermediate_model.predict(batch_x) # outputs a list of 4 arrays
|
||||
|
||||
prediction = preds[-1][0] # -1 is the output of the last layer
|
||||
|
||||
for w, c in sorted(list(zip(uniqs, prediction)), key = operator.itemgetter(1), reverse=True)[:25]:
|
||||
print(w + " - " + str(c))
|
||||
|
||||
92
35-dense/tf-35-multiple.py
Normal file
92
35-dense/tf-35-multiple.py
Normal file
@@ -0,0 +1,92 @@
|
||||
from keras.models import Model
|
||||
from keras import layers
|
||||
from keras.layers import Input, Dense
|
||||
from keras.utils import plot_model
|
||||
import numpy as np
|
||||
import sys, os, string
|
||||
|
||||
characters = string.printable
|
||||
char_indices = dict((c, i) for i, c in enumerate(characters))
|
||||
indices_char = dict((i, c) for i, c in enumerate(characters))
|
||||
|
||||
INPUT_VOCAB_SIZE = len(characters)
|
||||
LINE_SIZE = 100
|
||||
|
||||
def encode_one_hot(s):
|
||||
all = []
|
||||
for c in s:
|
||||
if c not in characters:
|
||||
continue
|
||||
x = np.zeros((INPUT_VOCAB_SIZE))
|
||||
index = char_indices[c]
|
||||
x[index] = 1
|
||||
all.append(x)
|
||||
return all
|
||||
|
||||
def decode_one_hot(x):
|
||||
s = []
|
||||
for onehot in x:
|
||||
one_index = np.where(onehot == 1) # tuple of two things
|
||||
if len(one_index[1]) > 0:
|
||||
n = one_index[1][0]
|
||||
c = indices_char[n]
|
||||
s.append(c)
|
||||
return ''.join(s)
|
||||
|
||||
def normalization_layer_set_weights(n_layer):
|
||||
wb = []
|
||||
b = np.zeros((INPUT_VOCAB_SIZE), dtype=np.float32)
|
||||
w = np.zeros((INPUT_VOCAB_SIZE, INPUT_VOCAB_SIZE), dtype=np.float32)
|
||||
# Let lower case letters go through
|
||||
for c in string.ascii_lowercase:
|
||||
i = char_indices[c]
|
||||
w[i, i] = 1
|
||||
# Map capitals to lower case
|
||||
for c in string.ascii_uppercase:
|
||||
i = char_indices[c]
|
||||
il = char_indices[c.lower()]
|
||||
w[i, il] = 1
|
||||
# Map all non-letters to space
|
||||
sp_idx = char_indices[' ']
|
||||
for c in [c for c in list(string.printable) if c not in list(string.ascii_letters)]:
|
||||
i = char_indices[c]
|
||||
w[i, sp_idx] = 1
|
||||
|
||||
wb.append(w)
|
||||
wb.append(b)
|
||||
n_layer.set_weights(wb)
|
||||
return n_layer
|
||||
|
||||
def build_model():
|
||||
# Normalize characters using a shared dense model
|
||||
n_layer = Dense(INPUT_VOCAB_SIZE)
|
||||
raw_inputs = []
|
||||
normalized_outputs = []
|
||||
for _ in range(0, LINE_SIZE):
|
||||
input_char = Input(shape=(INPUT_VOCAB_SIZE, ))
|
||||
filtered_char = n_layer(input_char)
|
||||
raw_inputs.append(input_char)
|
||||
normalized_outputs.append(filtered_char)
|
||||
normalization_layer_set_weights(n_layer)
|
||||
|
||||
model = Model(inputs=raw_inputs, outputs=normalized_outputs)
|
||||
return model
|
||||
|
||||
model = build_model()
|
||||
|
||||
with open(sys.argv[1]) as f:
|
||||
for line in f:
|
||||
if line.isspace(): continue
|
||||
onehots = encode_one_hot(line)
|
||||
|
||||
data = [[] for _ in range(LINE_SIZE)]
|
||||
for i, c in enumerate(onehots):
|
||||
data[i].append(c)
|
||||
for j in range(len(onehots), LINE_SIZE):
|
||||
data[j].append(np.zeros((INPUT_VOCAB_SIZE)))
|
||||
|
||||
inputs = [np.array(e) for e in data]
|
||||
preds = model.predict(inputs)
|
||||
normal = decode_one_hot(preds)
|
||||
|
||||
print(normal)
|
||||
71
35-dense/tf-35.py
Normal file
71
35-dense/tf-35.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense
|
||||
from keras.utils import plot_model
|
||||
import numpy as np
|
||||
import sys, os, string
|
||||
|
||||
characters = string.printable
|
||||
char_indices = dict((c, i) for i, c in enumerate(characters))
|
||||
indices_char = dict((i, c) for i, c in enumerate(characters))
|
||||
|
||||
INPUT_VOCAB_SIZE = len(characters)
|
||||
LINE_SIZE = 100
|
||||
|
||||
def encode_one_hot(line):
|
||||
x = np.zeros((len(line), INPUT_VOCAB_SIZE))
|
||||
for i, c in enumerate(line):
|
||||
if c in characters:
|
||||
index = char_indices[c]
|
||||
else:
|
||||
index = char_indices[' ']
|
||||
x[i][index] = 1
|
||||
return x
|
||||
|
||||
def decode_one_hot(x):
|
||||
s = []
|
||||
for onehot in x:
|
||||
one_index = np.argmax(onehot)
|
||||
s.append(indices_char[one_index])
|
||||
return ''.join(s)
|
||||
|
||||
def normalization_layer_set_weights(n_layer):
|
||||
wb = []
|
||||
b = np.zeros((INPUT_VOCAB_SIZE), dtype=np.float32)
|
||||
w = np.zeros((INPUT_VOCAB_SIZE, INPUT_VOCAB_SIZE), dtype=np.float32)
|
||||
# Let lower case letters go through
|
||||
for c in string.ascii_lowercase:
|
||||
i = char_indices[c]
|
||||
w[i, i] = 1
|
||||
# Map capitals to lower case
|
||||
for c in string.ascii_uppercase:
|
||||
i = char_indices[c]
|
||||
il = char_indices[c.lower()]
|
||||
w[i, il] = 1
|
||||
# Map all non-letters to space
|
||||
sp_idx = char_indices[' ']
|
||||
for c in [c for c in list(string.printable) if c not in list(string.ascii_letters)]:
|
||||
i = char_indices[c]
|
||||
w[i, sp_idx] = 1
|
||||
|
||||
wb.append(w)
|
||||
wb.append(b)
|
||||
n_layer.set_weights(wb)
|
||||
return n_layer
|
||||
|
||||
def build_model():
|
||||
# Normalize characters using a dense layer
|
||||
model = Sequential()
|
||||
dense_layer = Dense(INPUT_VOCAB_SIZE, input_shape=(INPUT_VOCAB_SIZE,))
|
||||
model.add(dense_layer)
|
||||
normalization_layer_set_weights(dense_layer)
|
||||
return model
|
||||
|
||||
model = build_model()
|
||||
|
||||
with open(sys.argv[1]) as f:
|
||||
for line in f:
|
||||
if line.isspace(): continue
|
||||
batch = encode_one_hot(line)
|
||||
preds = model.predict(batch)
|
||||
normal = decode_one_hot(preds)
|
||||
print(normal)
|
||||
Reference in New Issue
Block a user