Using Neural Nets for Natural Language Processing: Training

Experiment

I updated the code to investigate training after the observation that having a small batch last in training had far worse performance. One thing I tried was training twice. That had no effect at all. I checked having the non-full batch first or last and the performance was mostly the same. The final thing I checked was where the batch size alternates between two size. That had much worse performance than a constant batch size.

Lesson

Make all the batches the same size. It will revisit this if I do some kind of incremental learning.

Data

python main1_ordering.py words_999.txt
Batches in same order as file, non-full batch appears last
   Train once
   Batch size 100, Accuracy: 993 / 999
   Batch size 200, Accuracy: 995 / 999
   Batch size 300, Accuracy: 154 / 999
   Batch size 400, Accuracy: 411 / 999
   Batch size 500, Accuracy: 999 / 999
   Batch size 600, Accuracy: 841 / 999
   Batch size 700, Accuracy: 369 / 999
   Batch size 800, Accuracy: 200 / 999
   Batch size 900, Accuracy: 99 / 999
   Batch size 1000, Accuracy: 999 / 999
   Train twice
   Batch size 100, Accuracy: 993 / 999
   Batch size 200, Accuracy: 995 / 999
   Batch size 300, Accuracy: 154 / 999
   Batch size 400, Accuracy: 411 / 999
   Batch size 500, Accuracy: 999 / 999
   Batch size 600, Accuracy: 841 / 999
   Batch size 700, Accuracy: 369 / 999
   Batch size 800, Accuracy: 200 / 999
   Batch size 900, Accuracy: 99 / 999
   Batch size 1000, Accuracy: 999 / 999
Batches in same order as file, non-full batch appears first
   Train once
   Batch size 100, Accuracy: 991 / 999
   Batch size 200, Accuracy: 995 / 999
   Batch size 300, Accuracy: 150 / 999
   Batch size 400, Accuracy: 450 / 999
   Batch size 500, Accuracy: 999 / 999
   Batch size 600, Accuracy: 831 / 999
   Batch size 700, Accuracy: 377 / 999
   Batch size 800, Accuracy: 200 / 999
   Batch size 900, Accuracy: 99 / 999
   Batch size 1000, Accuracy: 999 / 999
   Train twice
   Batch size 100, Accuracy: 991 / 999
   Batch size 200, Accuracy: 995 / 999
   Batch size 300, Accuracy: 150 / 999
   Batch size 400, Accuracy: 450 / 999
   Batch size 500, Accuracy: 999 / 999
   Batch size 600, Accuracy: 831 / 999
   Batch size 700, Accuracy: 377 / 999
   Batch size 800, Accuracy: 200 / 999
   Batch size 900, Accuracy: 99 / 999
   Batch size 1000, Accuracy: 999 / 999
Batches in same order as file, non-full batch appears last, batch size alternate by div 2
   Train once
   Batch size 100, Accuracy: 469 / 999
   Batch size 200, Accuracy: 503 / 999
   Batch size 300, Accuracy: 377 / 999
   Batch size 400, Accuracy: 672 / 999
   Batch size 500, Accuracy: 583 / 999
   Batch size 600, Accuracy: 116 / 999
   Batch size 700, Accuracy: 551 / 999
   Batch size 800, Accuracy: 892 / 999
   Batch size 900, Accuracy: 989 / 999
   Batch size 1000, Accuracy: 999 / 999
   Train twice
   Batch size 100, Accuracy: 475 / 999
   Batch size 200, Accuracy: 505 / 999
   Batch size 300, Accuracy: 378 / 999
   Batch size 400, Accuracy: 674 / 999
   Batch size 500, Accuracy: 583 / 999
   Batch size 600, Accuracy: 116 / 999
   Batch size 700, Accuracy: 551 / 999
   Batch size 800, Accuracy: 892 / 999
   Batch size 900, Accuracy: 989 / 999
   Batch size 1000, Accuracy: 999 / 999

Source Code

import tensorflow as tf
import numpy as np
"""
For each position there is a set of 26 letters. Output is one hot vector of words
"""
import sys
words_file = "words.txt"
if len(sys.argv) > 1:
words_file = sys.argv[1]
show_details = (len(sys.argv) > 2)
words_txt = open(words_file, "r")
words = words_txt.read().split('\n')
words.pop() # last is empty string
words_txt.close()
number_of_positions = 25
number_of_letters = 26
number_of_inputs = number_of_positions * number_of_letters
number_of_words = len(words)
number_of_outputs = len(words)
#import pdb; pdb.set_trace();
# setup input nodes for a word
def word_to_train(word, inputs):
for index, ch in enumerate(word):
    inputs[index*number_of_letters + ord(ch) - ord('a')] = 1
# do the training(get_batch_size)
def train(sess, get_batch_size, train_step, x, y_):
batch_number = 0
batch_size = get_batch_size(batch_number)
x_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_outputs), dtype=float)
index = 0
for word_number, word in enumerate(words):
    word_to_train(word, x_array[index])
    y_array[index][word_number] = 1
    index += 1
    if index == batch_size or word_number+1 == number_of_words:
      batch_number += 1
      batch_size = get_batch_size(batch_number)
      index = 0
      sess.run(train_step, feed_dict={x:x_array, y_: y_array})
      to_do = number_of_words - word_number - 1
      x_array = np.zeros(shape=(min(batch_size, to_do), number_of_inputs), dtype=float)
      y_array = np.zeros(shape=(min(batch_size, to_do), number_of_outputs), dtype=float)
def train_twice(sess, get_batch_size, train_step, x, y_):
train(sess, get_batch_size, train_step, x, y_)
train(sess, get_batch_size, train_step, x, y_)

def run_test(words, get_batch_size, train):
x = tf.placeholder(tf.float32, shape=[None, number_of_inputs])
y_ = tf.placeholder(tf.float32, shape=[None, number_of_outputs])
W = tf.Variable(tf.zeros([number_of_inputs, number_of_outputs]))
b = tf.Variable(tf.zeros([number_of_outputs]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
#import pdb; pdb.set_trace();
train(sess, get_batch_size, train_step, x, y_)
x_array = None
y_array = None
n_right = 0
#import pdb; pdb.set_trace();
for word_number, word in enumerate(words):
    #import pdb; pdb.set_trace()
    x_array = np.zeros(shape=(1, number_of_inputs), dtype=float)
    word_to_train(word, x_array[0])
    prediction = tf.argmax(y,1)
    prediction = sess.run(prediction, feed_dict={x: x_array})
    is_right = (prediction[0] == word_number)
    if show_details:
      if is_right:
        print "Right %s" % words[word_number]
      else:
        print "Wrong %s found %s" % (words[word_number], words[prediction[0]])
    if is_right:
      n_right += 1
return n_right

def run_tests(get_batch_size, train):
for batch_size in range(100, 1001, 100):
#for batch_size in range(100, 201, 100):
    n_right = run_test(words, get_batch_size(batch_size), train)
    print "   Batch size %d, Accuracy: %d / %d" % (batch_size, n_right, len(words))

print "Batches in same order as file, non-full batch appears last"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    return batch_size
return get_batch_size_2
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears first"
def get_batch_size(batch_size):
def get_batch_size2(batch_number):
    if batch_number == 0:
      return number_of_words % batch_size
    else:
      return batch_size
return get_batch_size2
print "   Train once"
#import pdb; pdb.set_trace();
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears last, batch size alternate by div 2"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    if batch_number % 2 == 0:
      return batch_size / 2
    else:
      return batch_size
return get_batch_size_2
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)

Using Neural Nets for Natural Language Processing

Friday, July 8, 2016

Training

Experiment

Lesson

Data

Source Code

No comments:

Post a Comment

Blog Archive