Using Neural Nets for Natural Language Processing: 2016

Thursday, August 11, 2016

New Design for spelling mistakes - Try 2

Experiment

Here are some examples of errors in the lookup

Wrong absorptive found advertisements was absorptive
Wrong abstain found absentmindedness was abstain
Wrong abstained found absentmindedness was abstained
Wrong abstainer found absentmindedness was abstainer
Wrong abstainers found absentmindedness was abstainers
Wrong abstaining found administrating was abstaining
Wrong abstains found absentmindedness was abstains
Wrong abstemious found abstemiousness was abstemious
Wrong abstemiously found abstemiousness was abstemiously
Wrong abstention found absentmindedness was abstention
Wrong abstentions found absentmindedness was abstentions
Wrong abstinence found absentmindedness was abstinence
Wrong abstinent found absentmindedness was abstinent
Wrong abstract found abstracts was abstract
Wrong abstracted found accelerates was abstracted
Wrong abstractedly found aesthetically was abstractedly

I think it was wrong to use a count as the input into the node that represented how many times the letter appeared in the word. I am going to change that to have a node for each count of a letter with sizes of 0, 1, 2 and more.

Lesson

The new design handles spelling mistakes way better without any training on spelling mistakes. The design needs to get all the words right for the exact case so some refinement is still needed. In addition, I need to get this running on a video card so I can see the effectiveness with the full range of words since in the last experiment the precision with a small amount of input was out of line with the larger amount of input.

Data

   Train once
   Batch size 100
                                     transposition 7644 / 7673
                                             exact 993 / 999
                                          deletion 7139 / 8672

Code

import tensorflow as tf
import numpy as np

"""
For each word there is a length node, a first letter node, and for each letter in the word there is a node with input of 1 * number of letters
"""

import sys
words_file = "words.txt"
if len(sys.argv) > 1:
words_file = sys.argv[1]
show_details = (len(sys.argv) > 2)

words_txt = open(words_file, "r")
words = words_txt.read().split('\n')
words.pop() # last is empty string
words_txt.close()

number_of_letters = 26
number_of_length_nodes = 30
number_of_first_letter_nodes = number_of_letters
number_of_counts_for_letter_nodes = 4
number_of_letters_nodes = number_of_letters * number_of_counts_for_letter_nodes
number_of_positions = 25
size_of_sections = [
number_of_length_nodes,
number_of_first_letter_nodes,
number_of_letters_nodes
]
position_of_sections = [0]
last_position = 0
for size in size_of_sections:
position_of_sections.append(last_position + size)
number_of_inputs = sum(size_of_sections)

number_of_words = len(words)
number_of_outputs = len(words)

#import pdb; pdb.set_trace();

# setup input nodes for a word
def word_to_train(word, inputs):
# section 1 - length of word
length = len(word)
inputs[position_of_sections[0]+length] = 1

if length > 1:
    inputs[position_of_sections[0]+length-1] = 0.5
if length > 2:
    inputs[position_of_sections[0]+length-2] = 0.25

if length+1 < number_of_length_nodes:
    inputs[position_of_sections[0]+length+1] = 0.5
if length+2 < number_of_length_nodes:
    inputs[position_of_sections[0]+length+2] = 0.25

# section 2 - first letter of word
inputs[position_of_sections[1] + ord(word[0]) - ord('a')] = 1

# section 3 - number of times letter appears in word (0,1,2,more)
counts = [0] * number_of_letters
for index, ch in enumerate(word):
    counts[ord(ch) - ord('a')] += 1

for letter_index, count in enumerate(counts):
    if count == 0:
      count_index = 0
    elif count == 1:
      count_index = 1
    elif count == 2:
      count_index = 2
    else:
      count_index = 3
    inputs[position_of_sections[2] + letter_index*number_of_counts_for_letter_nodes + count_index] = 1

# do the training(get_batch_size)
def train(sess, get_batch_size, train_step, x, y_):
batch_number = 0
batch_size = get_batch_size(batch_number)

x_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_outputs), dtype=float)

index = 0
for word_number, word in enumerate(words):
    word_to_train(word, x_array[index])
    y_array[index][word_number] = 1
    index += 1
    if index == batch_size or word_number+1 == number_of_words:
      batch_number += 1
      batch_size = get_batch_size(batch_number)

      index = 0
      sess.run(train_step, feed_dict={x:x_array, y_: y_array})
      to_do = number_of_words - word_number - 1
      x_array = np.zeros(shape=(min(batch_size, to_do), number_of_inputs), dtype=float)
      y_array = np.zeros(shape=(min(batch_size, to_do), number_of_outputs), dtype=float)

def train_twice(sess, get_batch_size, train_step, x, y_):
train(sess, get_batch_size, train_step, x, y_)
train(sess, get_batch_size, train_step, x, y_)

def run_test_one_word(sess, x, y, word):
x_array = np.zeros(shape=(1, number_of_inputs), dtype=float)
word_to_train(word, x_array[0])
prediction = tf.argmax(y,1)
prediction = sess.run(prediction, feed_dict={x: x_array})
return prediction[0]

def word_to_test_words(word, include_original=True, include_remove=True, include_transpose=True):
words = []

#original word
words.append(word)

# remove a character
for i in range(0,len(word)):
    words.append( word[:i] + word[i+1:] )

# transpose characters
if len(word) > 1:
    for i in range(0,len(word)-1):
      before = word[:i]
      letter1 = word[i]
      letter2 = word[i+1]
      after = word[i+2:]
      words.append( before + letter2 + letter1 + after )

return words

def word_to_train_words(word):
return [word]

def word_to_test_words_exact(word):
return [word]

def word_to_test_words_deletion(word):
words = []
for i in range(0,len(word)):
    words.append( word[:i] + word[i+1:] )
return words

def word_to_test_words_transposition(word):
words = []
if len(word) > 1:
    for i in range(0,len(word)-1):
      before = word[:i]
      letter1 = word[i]
      letter2 = word[i+1]
      after = word[i+2:]
      words.append( before + letter2 + letter1 + after )
return words

def run_test_for_case(sess, x, y, word_to_test_words_function):

n_right = 0
n_checked = 0

for word_number, word in enumerate(words):
    test_words = word_to_test_words_function(word)

    for test_word in test_words:
      prediction = run_test_one_word(sess, x, y, test_word)
      is_right = prediction == word_number

      if show_details:
        #if is_right:
          #print "Right %s matches %s" % (words[word_number], test_word)
        #else:
          #print "Wrong %s found %s was %s" % (test_word, words[prediction], word)
        if not is_right:
          #import pdb; pdb.set_trace()
          print "Wrong %s found %s was %s" % (test_word, words[prediction], word)

      if is_right:
        n_right += 1

      n_checked += 1
return (n_right, n_checked)

def run_test(words, get_batch_size, train):
x = tf.placeholder(tf.float32, shape=[None, number_of_inputs])
y_ = tf.placeholder(tf.float32, shape=[None, number_of_outputs])

W = tf.Variable(tf.zeros([number_of_inputs, number_of_outputs]))
b = tf.Variable(tf.zeros([number_of_outputs]))

y = tf.nn.softmax(tf.matmul(x,W) + b)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())

#import pdb; pdb.set_trace();
train(sess, get_batch_size, train_step, x, y_)

results = {}
results["exact"] = run_test_for_case(sess, x, y, word_to_test_words_exact)
results["deletion"] = run_test_for_case(sess, x, y, word_to_test_words_deletion)
results["transposition"] = run_test_for_case(sess, x, y, word_to_test_words_transposition)
return results

def run_tests(get_batch_size, train):
for batch_size in range(100, 1001, 100):
    results = run_test(words, get_batch_size(batch_size), train)
    print "   Batch size %d" % batch_size
    for key in results.keys():
      (n_right, n_checked) = results[key]
      print "%50s %d / %d" % (key, n_right, n_checked)

print "Batches in same order as file, non-full batch appears last"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    return batch_size
return get_batch_size_2
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)

print "Batches in same order as file, non-full batch appears first"

def get_batch_size(batch_size):
def get_batch_size2(batch_number):
    if batch_number == 0:
      return number_of_words % batch_size
    else:
      return batch_size
return get_batch_size2

print "   Train once"
#import pdb; pdb.set_trace();
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)

print "Batches in same order as file, non-full batch appears last, batch size alternate by div 2"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    if batch_number % 2 == 0:
      return batch_size / 2
    else:
      return batch_size
return get_batch_size_2
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)

Monday, August 8, 2016

New Design for Spelling Mistakes - Try 1

Experiment

I updated the design of the neural net to handle spelling mistakes better. There is a word length section. For that each node represents the word length. There is a first letter section. For that there is one node for each letter and the one that matches the word's first letter is hot. There is a letters section. That has one node for each letter and the input is the number of times the letter appears in the word.

Lesson

This was interesting when I did a small batch run with nine words. The results looked great. When I increased the number of words to 999 then it fell apart.

Data

Small Batch Run (9 words)

Batches in same order as file, non-full batch appears last
   Train once
   Batch size 100
                                     transposition 54 / 55
                                             exact 9 / 9
                                          deletion 58 / 64
   Batch size 200
                                     transposition 54 / 55
                                             exact 9 / 9
                                          deletion 58 / 64
   Batch size 300
                                     transposition 54 / 55
                                             exact 9 / 9
                                          deletion 58 / 64
   Batch size 400
                                     transposition 54 / 55
                                             exact 9 / 9
                                          deletion 58 / 64
   Batch size 500
                                     transposition 54 / 55
                                             exact 9 / 9
                                          deletion 58 / 64
   Batch size 600
                                     transposition 54 / 55
                                             exact 9 / 9
                                          deletion 58 / 64
   Batch size 700
                                     transposition 54 / 55
                                             exact 9 / 9
                                          deletion 58 / 64
   Batch size 800
                                     transposition 54 / 55
                                             exact 9 / 9
                                          deletion 58 / 64
   Batch size 900
                                     transposition 54 / 55
                                             exact 9 / 9
                                          deletion 58 / 64
   Batch size 1000
                                     transposition 54 / 55
                                             exact 9 / 9
Large Batch Run (999 words)

Batches in same order as file, non-full batch appears last
   Train once
   Batch size 100
                                     transposition 1989 / 7673
                                             exact 194 / 999
                                          deletion 1882 / 8672
   Batch size 200
                                     transposition 1893 / 7673
                                             exact 181 / 999
                                          deletion 1926 / 8672
   Batch size 300
                                     transposition 450 / 7673
                                             exact 52 / 999
                                          deletion 460 / 8672
   Batch size 400
                                     transposition 704 / 7673
                                             exact 76 / 999
                                          deletion 693 / 8672
   Batch size 500
                                     transposition 1832 / 7673
                                             exact 174 / 999
                                          deletion 1947 / 8672
   Batch size 600
                                     transposition 1051 / 7673
                                             exact 110 / 999
                                          deletion 1049 / 8672
   Batch size 700
                                     transposition 871 / 7673
                                             exact 90 / 999
                                          deletion 894 / 8672
   Batch size 800
                                     transposition 704 / 7673
                                             exact 76 / 999
                                          deletion 693 / 8672
   Batch size 900
                                     transposition 450 / 7673
                                             exact 52 / 999
                                          deletion 460 / 8672

Code

import tensorflow as tf
import numpy as np
"""
For each word there is a length node, a first letter node, and for each letter in the word there is a node with input of 1 * number of letters
"""
import sys
words_file = "words.txt"
if len(sys.argv) > 1:
words_file = sys.argv[1]
show_details = (len(sys.argv) > 2)
words_txt = open(words_file, "r")
words = words_txt.read().split('\n')
words.pop() # last is empty string
words_txt.close()
number_of_letters = 26
number_of_length_nodes = 30
number_of_first_letter_nodes = number_of_letters
number_of_letters_nodes = number_of_letters
number_of_positions = 25
size_of_sections = [
number_of_length_nodes,
number_of_first_letter_nodes,
number_of_letters_nodes
]
position_of_sections = [0]
last_position = 0
for size in size_of_sections:
position_of_sections.append(last_position + size)
number_of_inputs = sum(size_of_sections)
number_of_words = len(words)
number_of_outputs = len(words)
#import pdb; pdb.set_trace();
# setup input nodes for a word
def word_to_train(word, inputs):
# section 1
length = len(word)
inputs[position_of_sections[0]+length] = 1

if length > 1:
    inputs[position_of_sections[0]+length-1] = 0.5
if length > 2:
    inputs[position_of_sections[0]+length-2] = 0.25

if length+1 < number_of_length_nodes:
    inputs[position_of_sections[0]+length+1] = 0.5
if length+2 < number_of_length_nodes:
    inputs[position_of_sections[0]+length+2] = 0.25
# section 2
inputs[position_of_sections[1] + ord(word[0]) - ord('a')] = 1
# section 3
for index, ch in enumerate(word):
    inputs[position_of_sections[2] + ord(ch) - ord('a')] += 1
# do the training(get_batch_size)
def train(sess, get_batch_size, train_step, x, y_):
batch_number = 0
batch_size = get_batch_size(batch_number)
x_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_outputs), dtype=float)
index = 0
for word_number, word in enumerate(words):
    word_to_train(word, x_array[index])
    y_array[index][word_number] = 1
    index += 1
    if index == batch_size or word_number+1 == number_of_words:
      batch_number += 1
      batch_size = get_batch_size(batch_number)
      index = 0
      sess.run(train_step, feed_dict={x:x_array, y_: y_array})
      to_do = number_of_words - word_number - 1
      x_array = np.zeros(shape=(min(batch_size, to_do), number_of_inputs), dtype=float)
      y_array = np.zeros(shape=(min(batch_size, to_do), number_of_outputs), dtype=float)
def train_twice(sess, get_batch_size, train_step, x, y_):
train(sess, get_batch_size, train_step, x, y_)
train(sess, get_batch_size, train_step, x, y_)
def run_test_one_word(sess, x, y, word):
x_array = np.zeros(shape=(1, number_of_inputs), dtype=float)
word_to_train(word, x_array[0])
prediction = tf.argmax(y,1)
prediction = sess.run(prediction, feed_dict={x: x_array})
return prediction[0]
def word_to_test_words(word, include_original=True, include_remove=True, include_transpose=True):
words = []
#original word
words.append(word)
# remove a character
for i in range(0,len(word)):
    words.append( word[:i] + word[i+1:] )
# transpose characters
if len(word) > 1:
    for i in range(0,len(word)-1):
      before = word[:i]
      letter1 = word[i]
      letter2 = word[i+1]
      after = word[i+2:]
      words.append( before + letter2 + letter1 + after )

return words
def word_to_train_words(word):
return [word]
def word_to_test_words_exact(word):
return [word]
def word_to_test_words_deletion(word):
words = []
for i in range(0,len(word)):
    words.append( word[:i] + word[i+1:] )
return words
def word_to_test_words_transposition(word):
words = []
if len(word) > 1:
    for i in range(0,len(word)-1):
      before = word[:i]
      letter1 = word[i]
      letter2 = word[i+1]
      after = word[i+2:]
      words.append( before + letter2 + letter1 + after )
return words
def run_test_for_case(sess, x, y, word_to_test_words_function):
n_right = 0
n_checked = 0

for word_number, word in enumerate(words):
    test_words = word_to_test_words_function(word)
    for test_word in test_words:
      prediction = run_test_one_word(sess, x, y, test_word)
      is_right = prediction == word_number
      if show_details:
        #if is_right:
          #print "Right %s matches %s" % (words[word_number], test_word)
        #else:
          #print "Wrong %s found %s was %s" % (test_word, words[prediction], word)
        if not is_right:
          #import pdb; pdb.set_trace()
          print "Wrong %s found %s was %s" % (test_word, words[prediction], word)
      if is_right:
        n_right += 1
      n_checked += 1
return (n_right, n_checked)
def run_test(words, get_batch_size, train):
x = tf.placeholder(tf.float32, shape=[None, number_of_inputs])
y_ = tf.placeholder(tf.float32, shape=[None, number_of_outputs])
W = tf.Variable(tf.zeros([number_of_inputs, number_of_outputs]))
b = tf.Variable(tf.zeros([number_of_outputs]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
#import pdb; pdb.set_trace();
train(sess, get_batch_size, train_step, x, y_)
results = {}
results["exact"] = run_test_for_case(sess, x, y, word_to_test_words_exact)
results["deletion"] = run_test_for_case(sess, x, y, word_to_test_words_deletion)
results["transposition"] = run_test_for_case(sess, x, y, word_to_test_words_transposition)
return results
def run_tests(get_batch_size, train):
for batch_size in range(100, 1001, 100):
    results = run_test(words, get_batch_size(batch_size), train)
    print "   Batch size %d" % batch_size
    for key in results.keys():
      (n_right, n_checked) = results[key]
      print "%50s %d / %d" % (key, n_right, n_checked)
print "Batches in same order as file, non-full batch appears last"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    return batch_size
return get_batch_size_2
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears first"
def get_batch_size(batch_size):
def get_batch_size2(batch_number):
    if batch_number == 0:
      return number_of_words % batch_size
    else:
      return batch_size
return get_batch_size2
print "   Train once"
#import pdb; pdb.set_trace();
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears last, batch size alternate by div 2"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    if batch_number % 2 == 0:
      return batch_size / 2
    else:
      return batch_size
return get_batch_size_2
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)

Friday, July 29, 2016

Spelling Mistakes

Experiment

Using the same design as before I updated the tests to check spelling mistakes. Transposition is two letters are transposed then the word is looked up. Deletion is one letter is deleted and then the lookup is done. The data shows that the design is not good enough to handle spelling mistakes. I did not train on the spelling mistakes as I want a design that naturally accomodates spelling mistakes.

Lesson

The design needs to be different to handle spelling mistakes.

Data

   Train once
   Batch size 100
                                     transposition 6406 / 7673
                                             exact 993 / 999
                                          deletion 1279 / 8672
   Batch size 200
                                     transposition 6407 / 7673
                                             exact 995 / 999
                                          deletion 1279 / 8672
   Batch size 300
                                     transposition 975 / 7673
                                             exact 154 / 999
                                          deletion 174 / 8672
   Batch size 400
                                     transposition 2449 / 7673
                                             exact 411 / 999
                                          deletion 477 / 8672
   Batch size 500
                                     transposition 6415 / 7673
                                             exact 999 / 999
                                          deletion 1258 / 8672
   Batch size 600
                                     transposition 4800 / 7673
                                             exact 841 / 999
                                          deletion 1011 / 8672
   Batch size 700
                                     transposition 2451 / 7673
                                             exact 369 / 999
                                          deletion 437 / 8672
   Batch size 800
                                     transposition 1402 / 7673
                                             exact 200 / 999
                                          deletion 275 / 8672
   Batch size 900
                                     transposition 696 / 7673
                                             exact 99 / 999
                                          deletion 157 / 8672
   Batch size 1000
                                     transposition 6384 / 7673
                                             exact 999 / 999
                                          deletion 1234 / 8672

Source Code

import tensorflow as tf
import numpy as np
"""
For each position there is a set of 26 letters. Output is one hot vector of words
"""
import sys
words_file = "words.txt"
if len(sys.argv) > 1:
words_file = sys.argv[1]
show_details = (len(sys.argv) > 2)
words_txt = open(words_file, "r")
words = words_txt.read().split('\n')
words.pop() # last is empty string
words_txt.close()
number_of_positions = 25
number_of_letters = 26
number_of_inputs = number_of_positions * number_of_letters
number_of_words = len(words)
number_of_outputs = len(words)
#import pdb; pdb.set_trace();
# setup input nodes for a word
def word_to_train(word, inputs):
for index, ch in enumerate(word):
    inputs[index*number_of_letters + ord(ch) - ord('a')] = 1
# do the training(get_batch_size)
def train(sess, get_batch_size, train_step, x, y_):
batch_number = 0
batch_size = get_batch_size(batch_number)
x_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_outputs), dtype=float)
index = 0
for word_number, word in enumerate(words):
    word_to_train(word, x_array[index])
    y_array[index][word_number] = 1
    index += 1
    if index == batch_size or word_number+1 == number_of_words:
      batch_number += 1
      batch_size = get_batch_size(batch_number)
      index = 0
      sess.run(train_step, feed_dict={x:x_array, y_: y_array})
      to_do = number_of_words - word_number - 1
      x_array = np.zeros(shape=(min(batch_size, to_do), number_of_inputs), dtype=float)
      y_array = np.zeros(shape=(min(batch_size, to_do), number_of_outputs), dtype=float)
def train_twice(sess, get_batch_size, train_step, x, y_):
train(sess, get_batch_size, train_step, x, y_)
train(sess, get_batch_size, train_step, x, y_)
def run_test_one_word(sess, x, y, word):
x_array = np.zeros(shape=(1, number_of_inputs), dtype=float)
word_to_train(word, x_array[0])
prediction = tf.argmax(y,1)
prediction = sess.run(prediction, feed_dict={x: x_array})
return prediction[0]
def word_to_test_words(word, include_original=True, include_remove=True, include_transpose=True):
words = []
#original word
words.append(word)
# remove a character
for i in range(0,len(word)):
    words.append( word[:i] + word[i+1:] )
# transpose characters
if len(word) > 1:
    for i in range(0,len(word)-1):
      before = word[:i]
      letter1 = word[i]
      letter2 = word[i+1]
      after = word[i+2:]
      words.append( before + letter2 + letter1 + after )

return words
def word_to_test_words_exact(word):
return [word]
def word_to_test_words_deletion(word):
words = []
for i in range(0,len(word)):
    words.append( word[:i] + word[i+1:] )
return words
def word_to_test_words_transposition(word):
words = []
if len(word) > 1:
    for i in range(0,len(word)-1):
      before = word[:i]
      letter1 = word[i]
      letter2 = word[i+1]
      after = word[i+2:]
      words.append( before + letter2 + letter1 + after )
return words
def run_test_for_case(sess, x, y, word_to_test_words_function):
n_right = 0
n_checked = 0

for word_number, word in enumerate(words):
    test_words = word_to_test_words_function(word)
    for test_word in test_words:
      prediction = run_test_one_word(sess, x, y, test_word)
      is_right = prediction == word_number
      if show_details:
        if is_right:
          print "Right %s matches %s" % (words[word_number], test_word)
        else:
          print "Wrong %s found %s" % (test_word, words[prediction])
      if is_right:
        n_right += 1
      n_checked += 1
return (n_right, n_checked)
def run_test(words, get_batch_size, train):
x = tf.placeholder(tf.float32, shape=[None, number_of_inputs])
y_ = tf.placeholder(tf.float32, shape=[None, number_of_outputs])
W = tf.Variable(tf.zeros([number_of_inputs, number_of_outputs]))
b = tf.Variable(tf.zeros([number_of_outputs]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
#import pdb; pdb.set_trace();
train(sess, get_batch_size, train_step, x, y_)
results = {}
results["exact"] = run_test_for_case(sess, x, y, word_to_test_words_exact)
results["deletion"] = run_test_for_case(sess, x, y, word_to_test_words_deletion)
results["transposition"] = run_test_for_case(sess, x, y, word_to_test_words_transposition)
return results
def run_tests(get_batch_size, train):
for batch_size in range(100, 1001, 100):
    results = run_test(words, get_batch_size(batch_size), train)
    print "   Batch size %d" % batch_size
    for key in results.keys():
      (n_right, n_checked) = results[key]
      print "%50s %d / %d" % (key, n_right, n_checked)
print "Batches in same order as file, non-full batch appears last"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    return batch_size
return get_batch_size_2
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears first"
def get_batch_size(batch_size):
def get_batch_size2(batch_number):
    if batch_number == 0:
      return number_of_words % batch_size
    else:
      return batch_size
return get_batch_size2
print "   Train once"
#import pdb; pdb.set_trace();
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears last, batch size alternate by div 2"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    if batch_number % 2 == 0:
      return batch_size / 2
    else:
      return batch_size
return get_batch_size_2
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)

Friday, July 8, 2016

Training

Experiment

I updated the code to investigate training after the observation that having a small batch last in training had far worse performance. One thing I tried was training twice. That had no effect at all. I checked having the non-full batch first or last and the performance was mostly the same. The final thing I checked was where the batch size alternates between two size. That had much worse performance than a constant batch size.

Lesson

Make all the batches the same size. It will revisit this if I do some kind of incremental learning.

Data

python main1_ordering.py words_999.txt
Batches in same order as file, non-full batch appears last
   Train once
   Batch size 100, Accuracy: 993 / 999
   Batch size 200, Accuracy: 995 / 999
   Batch size 300, Accuracy: 154 / 999
   Batch size 400, Accuracy: 411 / 999
   Batch size 500, Accuracy: 999 / 999
   Batch size 600, Accuracy: 841 / 999
   Batch size 700, Accuracy: 369 / 999
   Batch size 800, Accuracy: 200 / 999
   Batch size 900, Accuracy: 99 / 999
   Batch size 1000, Accuracy: 999 / 999
   Train twice
   Batch size 100, Accuracy: 993 / 999
   Batch size 200, Accuracy: 995 / 999
   Batch size 300, Accuracy: 154 / 999
   Batch size 400, Accuracy: 411 / 999
   Batch size 500, Accuracy: 999 / 999
   Batch size 600, Accuracy: 841 / 999
   Batch size 700, Accuracy: 369 / 999
   Batch size 800, Accuracy: 200 / 999
   Batch size 900, Accuracy: 99 / 999
   Batch size 1000, Accuracy: 999 / 999
Batches in same order as file, non-full batch appears first
   Train once
   Batch size 100, Accuracy: 991 / 999
   Batch size 200, Accuracy: 995 / 999
   Batch size 300, Accuracy: 150 / 999
   Batch size 400, Accuracy: 450 / 999
   Batch size 500, Accuracy: 999 / 999
   Batch size 600, Accuracy: 831 / 999
   Batch size 700, Accuracy: 377 / 999
   Batch size 800, Accuracy: 200 / 999
   Batch size 900, Accuracy: 99 / 999
   Batch size 1000, Accuracy: 999 / 999
   Train twice
   Batch size 100, Accuracy: 991 / 999
   Batch size 200, Accuracy: 995 / 999
   Batch size 300, Accuracy: 150 / 999
   Batch size 400, Accuracy: 450 / 999
   Batch size 500, Accuracy: 999 / 999
   Batch size 600, Accuracy: 831 / 999
   Batch size 700, Accuracy: 377 / 999
   Batch size 800, Accuracy: 200 / 999
   Batch size 900, Accuracy: 99 / 999
   Batch size 1000, Accuracy: 999 / 999
Batches in same order as file, non-full batch appears last, batch size alternate by div 2
   Train once
   Batch size 100, Accuracy: 469 / 999
   Batch size 200, Accuracy: 503 / 999
   Batch size 300, Accuracy: 377 / 999
   Batch size 400, Accuracy: 672 / 999
   Batch size 500, Accuracy: 583 / 999
   Batch size 600, Accuracy: 116 / 999
   Batch size 700, Accuracy: 551 / 999
   Batch size 800, Accuracy: 892 / 999
   Batch size 900, Accuracy: 989 / 999
   Batch size 1000, Accuracy: 999 / 999
   Train twice
   Batch size 100, Accuracy: 475 / 999
   Batch size 200, Accuracy: 505 / 999
   Batch size 300, Accuracy: 378 / 999
   Batch size 400, Accuracy: 674 / 999
   Batch size 500, Accuracy: 583 / 999
   Batch size 600, Accuracy: 116 / 999
   Batch size 700, Accuracy: 551 / 999
   Batch size 800, Accuracy: 892 / 999
   Batch size 900, Accuracy: 989 / 999
   Batch size 1000, Accuracy: 999 / 999

Source Code

import tensorflow as tf
import numpy as np
"""
For each position there is a set of 26 letters. Output is one hot vector of words
"""
import sys
words_file = "words.txt"
if len(sys.argv) > 1:
words_file = sys.argv[1]
show_details = (len(sys.argv) > 2)
words_txt = open(words_file, "r")
words = words_txt.read().split('\n')
words.pop() # last is empty string
words_txt.close()
number_of_positions = 25
number_of_letters = 26
number_of_inputs = number_of_positions * number_of_letters
number_of_words = len(words)
number_of_outputs = len(words)
#import pdb; pdb.set_trace();
# setup input nodes for a word
def word_to_train(word, inputs):
for index, ch in enumerate(word):
    inputs[index*number_of_letters + ord(ch) - ord('a')] = 1
# do the training(get_batch_size)
def train(sess, get_batch_size, train_step, x, y_):
batch_number = 0
batch_size = get_batch_size(batch_number)
x_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_outputs), dtype=float)
index = 0
for word_number, word in enumerate(words):
    word_to_train(word, x_array[index])
    y_array[index][word_number] = 1
    index += 1
    if index == batch_size or word_number+1 == number_of_words:
      batch_number += 1
      batch_size = get_batch_size(batch_number)
      index = 0
      sess.run(train_step, feed_dict={x:x_array, y_: y_array})
      to_do = number_of_words - word_number - 1
      x_array = np.zeros(shape=(min(batch_size, to_do), number_of_inputs), dtype=float)
      y_array = np.zeros(shape=(min(batch_size, to_do), number_of_outputs), dtype=float)
def train_twice(sess, get_batch_size, train_step, x, y_):
train(sess, get_batch_size, train_step, x, y_)
train(sess, get_batch_size, train_step, x, y_)

def run_test(words, get_batch_size, train):
x = tf.placeholder(tf.float32, shape=[None, number_of_inputs])
y_ = tf.placeholder(tf.float32, shape=[None, number_of_outputs])
W = tf.Variable(tf.zeros([number_of_inputs, number_of_outputs]))
b = tf.Variable(tf.zeros([number_of_outputs]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
#import pdb; pdb.set_trace();
train(sess, get_batch_size, train_step, x, y_)
x_array = None
y_array = None
n_right = 0
#import pdb; pdb.set_trace();
for word_number, word in enumerate(words):
    #import pdb; pdb.set_trace()
    x_array = np.zeros(shape=(1, number_of_inputs), dtype=float)
    word_to_train(word, x_array[0])
    prediction = tf.argmax(y,1)
    prediction = sess.run(prediction, feed_dict={x: x_array})
    is_right = (prediction[0] == word_number)
    if show_details:
      if is_right:
        print "Right %s" % words[word_number]
      else:
        print "Wrong %s found %s" % (words[word_number], words[prediction[0]])
    if is_right:
      n_right += 1
return n_right

def run_tests(get_batch_size, train):
for batch_size in range(100, 1001, 100):
#for batch_size in range(100, 201, 100):
    n_right = run_test(words, get_batch_size(batch_size), train)
    print "   Batch size %d, Accuracy: %d / %d" % (batch_size, n_right, len(words))

print "Batches in same order as file, non-full batch appears last"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    return batch_size
return get_batch_size_2
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears first"
def get_batch_size(batch_size):
def get_batch_size2(batch_number):
    if batch_number == 0:
      return number_of_words % batch_size
    else:
      return batch_size
return get_batch_size2
print "   Train once"
#import pdb; pdb.set_trace();
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears last, batch size alternate by div 2"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    if batch_number % 2 == 0:
      return batch_size / 2
    else:
      return batch_size
return get_batch_size_2
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)