From 454e4956e38f9c19bb51847ee7fd8bcafeee3aed Mon Sep 17 00:00:00 2001 From: Crista Lopes Date: Sun, 8 Mar 2020 17:33:15 -0700 Subject: [PATCH] Shuffle things around --- 41-convolutions/tf-41.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/41-convolutions/tf-41.py b/41-convolutions/tf-41.py index 67a4bb2..cd352fc 100644 --- a/41-convolutions/tf-41.py +++ b/41-convolutions/tf-41.py @@ -1,9 +1,24 @@ from keras.models import Sequential, Model -from keras import layers, metrics +from keras.layers import Conv2D, ReLU, Lambda, Reshape from keras import backend as K import numpy as np import string, re, collections, os, sys, operator, math +stopwords = set(open('../stop_words.txt').read().split(',')) +all_words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()) +words = [w for w in all_words if w not in stopwords] + +uniqs = [''] + list(set(words)) +uniqs_indices = dict((w, i) for i, w in enumerate(uniqs)) +indices_uniqs = dict((i, w) for i, w in enumerate(uniqs)) + +indices = [uniqs_indices[w] for w in words] + +WORDS_SIZE = len(words) +VOCAB_SIZE = len(uniqs) +BIN_SIZE = math.ceil(math.log(VOCAB_SIZE, 2)) +print(f'Words size {WORDS_SIZE}, vocab size {VOCAB_SIZE}, bin size {BIN_SIZE}') + def encode_binary(W): x = np.zeros((1, WORDS_SIZE, BIN_SIZE, 1)) for i, w in enumerate(W): @@ -35,28 +50,13 @@ def SumPooling2D(x): def build_model(): model = Sequential() - model.add(layers.Conv2D(VOCAB_SIZE, (1, BIN_SIZE), input_shape=(WORDS_SIZE, BIN_SIZE, 1))) - model.add(layers.ReLU(threshold=1-1/BIN_SIZE)) - model.add(layers.Lambda(SumPooling2D)) - model.add(layers.Reshape((VOCAB_SIZE,))) + model.add(Conv2D(VOCAB_SIZE, (1, BIN_SIZE), input_shape=(WORDS_SIZE, BIN_SIZE, 1))) + model.add(ReLU(threshold=1-1/BIN_SIZE)) + model.add(Lambda(SumPooling2D)) + model.add(Reshape((VOCAB_SIZE,))) return model -stopwords = set(open('../stop_words.txt').read().split(',')) -all_words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()) -words = [w for w in all_words if w not in stopwords] - -uniqs = [''] + list(set(words)) -uniqs_indices = dict((w, i) for i, w in enumerate(uniqs)) -indices_uniqs = dict((i, w) for i, w in enumerate(uniqs)) - -indices = [uniqs_indices[w] for w in words] - -WORDS_SIZE = len(words) -VOCAB_SIZE = len(uniqs) -BIN_SIZE = math.ceil(math.log(VOCAB_SIZE, 2)) -print(f'Words size {WORDS_SIZE}, vocab size {VOCAB_SIZE}, bin size {BIN_SIZE}') - model = build_model() model.summary() conv_layer_set_weights(model.layers[0])