Experiment
I updated the design of the neural net to handle spelling mistakes better. There is a word length section. For that each node represents the word length. There is a first letter section. For that there is one node for each letter and the one that matches the word's first letter is hot. There is a letters section. That has one node for each letter and the input is the number of times the letter appears in the word.
Lesson
This was interesting when I did a small batch run with nine words. The results looked great. When I increased the number of words to 999 then it fell apart.Data
Small Batch Run (9 words)Batches in same order as file, non-full batch appears last
Train once
Batch size 100
transposition 54 / 55
exact 9 / 9
deletion 58 / 64
Batch size 200
transposition 54 / 55
exact 9 / 9
deletion 58 / 64
Batch size 300
transposition 54 / 55
exact 9 / 9
deletion 58 / 64
Batch size 400
transposition 54 / 55
exact 9 / 9
deletion 58 / 64
Batch size 500
transposition 54 / 55
exact 9 / 9
deletion 58 / 64
Batch size 600
transposition 54 / 55
exact 9 / 9
deletion 58 / 64
Batch size 700
transposition 54 / 55
exact 9 / 9
deletion 58 / 64
Batch size 800
transposition 54 / 55
exact 9 / 9
deletion 58 / 64
Batch size 900
transposition 54 / 55
exact 9 / 9
deletion 58 / 64
Batch size 1000
transposition 54 / 55
exact 9 / 9
Large Batch Run (999 words)
Batches in same order as file, non-full batch appears last
Train once
Batch size 100
transposition 1989 / 7673
exact 194 / 999
deletion 1882 / 8672
Batch size 200
transposition 1893 / 7673
exact 181 / 999
deletion 1926 / 8672
Batch size 300
transposition 450 / 7673
exact 52 / 999
deletion 460 / 8672
Batch size 400
transposition 704 / 7673
exact 76 / 999
deletion 693 / 8672
Batch size 500
transposition 1832 / 7673
exact 174 / 999
deletion 1947 / 8672
Batch size 600
transposition 1051 / 7673
exact 110 / 999
deletion 1049 / 8672
Batch size 700
transposition 871 / 7673
exact 90 / 999
deletion 894 / 8672
Batch size 800
transposition 704 / 7673
exact 76 / 999
deletion 693 / 8672
Batch size 900
transposition 450 / 7673
exact 52 / 999
deletion 460 / 8672
Code
import tensorflow as tfimport numpy as np
"""
For each word there is a length node, a first letter node, and for each letter in the word there is a node with input of 1 * number of letters
"""
import sys
words_file = "words.txt"
if len(sys.argv) > 1:
words_file = sys.argv[1]
show_details = (len(sys.argv) > 2)
words_txt = open(words_file, "r")
words = words_txt.read().split('\n')
words.pop() # last is empty string
words_txt.close()
number_of_letters = 26
number_of_length_nodes = 30
number_of_first_letter_nodes = number_of_letters
number_of_letters_nodes = number_of_letters
number_of_positions = 25
size_of_sections = [
number_of_length_nodes,
number_of_first_letter_nodes,
number_of_letters_nodes
]
position_of_sections = [0]
last_position = 0
for size in size_of_sections:
position_of_sections.append(last_position + size)
number_of_inputs = sum(size_of_sections)
number_of_words = len(words)
number_of_outputs = len(words)
#import pdb; pdb.set_trace();
# setup input nodes for a word
def word_to_train(word, inputs):
# section 1
length = len(word)
inputs[position_of_sections[0]+length] = 1
if length > 1:
inputs[position_of_sections[0]+length-1] = 0.5
if length > 2:
inputs[position_of_sections[0]+length-2] = 0.25
if length+1 < number_of_length_nodes:
inputs[position_of_sections[0]+length+1] = 0.5
if length+2 < number_of_length_nodes:
inputs[position_of_sections[0]+length+2] = 0.25
# section 2
inputs[position_of_sections[1] + ord(word[0]) - ord('a')] = 1
# section 3
for index, ch in enumerate(word):
inputs[position_of_sections[2] + ord(ch) - ord('a')] += 1
# do the training(get_batch_size)
def train(sess, get_batch_size, train_step, x, y_):
batch_number = 0
batch_size = get_batch_size(batch_number)
x_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_outputs), dtype=float)
index = 0
for word_number, word in enumerate(words):
word_to_train(word, x_array[index])
y_array[index][word_number] = 1
index += 1
if index == batch_size or word_number+1 == number_of_words:
batch_number += 1
batch_size = get_batch_size(batch_number)
index = 0
sess.run(train_step, feed_dict={x:x_array, y_: y_array})
to_do = number_of_words - word_number - 1
x_array = np.zeros(shape=(min(batch_size, to_do), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(batch_size, to_do), number_of_outputs), dtype=float)
def train_twice(sess, get_batch_size, train_step, x, y_):
train(sess, get_batch_size, train_step, x, y_)
train(sess, get_batch_size, train_step, x, y_)
def run_test_one_word(sess, x, y, word):
x_array = np.zeros(shape=(1, number_of_inputs), dtype=float)
word_to_train(word, x_array[0])
prediction = tf.argmax(y,1)
prediction = sess.run(prediction, feed_dict={x: x_array})
return prediction[0]
def word_to_test_words(word, include_original=True, include_remove=True, include_transpose=True):
words = []
#original word
words.append(word)
# remove a character
for i in range(0,len(word)):
words.append( word[:i] + word[i+1:] )
# transpose characters
if len(word) > 1:
for i in range(0,len(word)-1):
before = word[:i]
letter1 = word[i]
letter2 = word[i+1]
after = word[i+2:]
words.append( before + letter2 + letter1 + after )
return words
def word_to_train_words(word):
return [word]
def word_to_test_words_exact(word):
return [word]
def word_to_test_words_deletion(word):
words = []
for i in range(0,len(word)):
words.append( word[:i] + word[i+1:] )
return words
def word_to_test_words_transposition(word):
words = []
if len(word) > 1:
for i in range(0,len(word)-1):
before = word[:i]
letter1 = word[i]
letter2 = word[i+1]
after = word[i+2:]
words.append( before + letter2 + letter1 + after )
return words
def run_test_for_case(sess, x, y, word_to_test_words_function):
n_right = 0
n_checked = 0
for word_number, word in enumerate(words):
test_words = word_to_test_words_function(word)
for test_word in test_words:
prediction = run_test_one_word(sess, x, y, test_word)
is_right = prediction == word_number
if show_details:
#if is_right:
#print "Right %s matches %s" % (words[word_number], test_word)
#else:
#print "Wrong %s found %s was %s" % (test_word, words[prediction], word)
if not is_right:
#import pdb; pdb.set_trace()
print "Wrong %s found %s was %s" % (test_word, words[prediction], word)
if is_right:
n_right += 1
n_checked += 1
return (n_right, n_checked)
def run_test(words, get_batch_size, train):
x = tf.placeholder(tf.float32, shape=[None, number_of_inputs])
y_ = tf.placeholder(tf.float32, shape=[None, number_of_outputs])
W = tf.Variable(tf.zeros([number_of_inputs, number_of_outputs]))
b = tf.Variable(tf.zeros([number_of_outputs]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
#import pdb; pdb.set_trace();
train(sess, get_batch_size, train_step, x, y_)
results = {}
results["exact"] = run_test_for_case(sess, x, y, word_to_test_words_exact)
results["deletion"] = run_test_for_case(sess, x, y, word_to_test_words_deletion)
results["transposition"] = run_test_for_case(sess, x, y, word_to_test_words_transposition)
return results
def run_tests(get_batch_size, train):
for batch_size in range(100, 1001, 100):
results = run_test(words, get_batch_size(batch_size), train)
print " Batch size %d" % batch_size
for key in results.keys():
(n_right, n_checked) = results[key]
print "%50s %d / %d" % (key, n_right, n_checked)
print "Batches in same order as file, non-full batch appears last"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
return batch_size
return get_batch_size_2
print " Train once"
run_tests(get_batch_size, train)
print " Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears first"
def get_batch_size(batch_size):
def get_batch_size2(batch_number):
if batch_number == 0:
return number_of_words % batch_size
else:
return batch_size
return get_batch_size2
print " Train once"
#import pdb; pdb.set_trace();
run_tests(get_batch_size, train)
print " Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears last, batch size alternate by div 2"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
if batch_number % 2 == 0:
return batch_size / 2
else:
return batch_size
return get_batch_size_2
print " Train once"
run_tests(get_batch_size, train)
print " Train twice"
run_tests(get_batch_size, train_twice)
No comments:
Post a Comment