Experiment
I updated the code to investigate training after the observation that having a small batch last in training had far worse performance. One thing I tried was training twice. That had no effect at all. I checked having the non-full batch first or last and the performance was mostly the same. The final thing I checked was where the batch size alternates between two size. That had much worse performance than a constant batch size.
Lesson
Make all the batches the same size. It will revisit this if I do some kind of incremental learning.Data
python main1_ordering.py words_999.txtBatches in same order as file, non-full batch appears last
Train once
Batch size 100, Accuracy: 993 / 999
Batch size 200, Accuracy: 995 / 999
Batch size 300, Accuracy: 154 / 999
Batch size 400, Accuracy: 411 / 999
Batch size 500, Accuracy: 999 / 999
Batch size 600, Accuracy: 841 / 999
Batch size 700, Accuracy: 369 / 999
Batch size 800, Accuracy: 200 / 999
Batch size 900, Accuracy: 99 / 999
Batch size 1000, Accuracy: 999 / 999
Train twice
Batch size 100, Accuracy: 993 / 999
Batch size 200, Accuracy: 995 / 999
Batch size 300, Accuracy: 154 / 999
Batch size 400, Accuracy: 411 / 999
Batch size 500, Accuracy: 999 / 999
Batch size 600, Accuracy: 841 / 999
Batch size 700, Accuracy: 369 / 999
Batch size 800, Accuracy: 200 / 999
Batch size 900, Accuracy: 99 / 999
Batch size 1000, Accuracy: 999 / 999
Batches in same order as file, non-full batch appears first
Train once
Batch size 100, Accuracy: 991 / 999
Batch size 200, Accuracy: 995 / 999
Batch size 300, Accuracy: 150 / 999
Batch size 400, Accuracy: 450 / 999
Batch size 500, Accuracy: 999 / 999
Batch size 600, Accuracy: 831 / 999
Batch size 700, Accuracy: 377 / 999
Batch size 800, Accuracy: 200 / 999
Batch size 900, Accuracy: 99 / 999
Batch size 1000, Accuracy: 999 / 999
Train twice
Batch size 100, Accuracy: 991 / 999
Batch size 200, Accuracy: 995 / 999
Batch size 300, Accuracy: 150 / 999
Batch size 400, Accuracy: 450 / 999
Batch size 500, Accuracy: 999 / 999
Batch size 600, Accuracy: 831 / 999
Batch size 700, Accuracy: 377 / 999
Batch size 800, Accuracy: 200 / 999
Batch size 900, Accuracy: 99 / 999
Batch size 1000, Accuracy: 999 / 999
Batches in same order as file, non-full batch appears last, batch size alternate by div 2
Train once
Batch size 100, Accuracy: 469 / 999
Batch size 200, Accuracy: 503 / 999
Batch size 300, Accuracy: 377 / 999
Batch size 400, Accuracy: 672 / 999
Batch size 500, Accuracy: 583 / 999
Batch size 600, Accuracy: 116 / 999
Batch size 700, Accuracy: 551 / 999
Batch size 800, Accuracy: 892 / 999
Batch size 900, Accuracy: 989 / 999
Batch size 1000, Accuracy: 999 / 999
Train twice
Batch size 100, Accuracy: 475 / 999
Batch size 200, Accuracy: 505 / 999
Batch size 300, Accuracy: 378 / 999
Batch size 400, Accuracy: 674 / 999
Batch size 500, Accuracy: 583 / 999
Batch size 600, Accuracy: 116 / 999
Batch size 700, Accuracy: 551 / 999
Batch size 800, Accuracy: 892 / 999
Batch size 900, Accuracy: 989 / 999
Batch size 1000, Accuracy: 999 / 999
Source Code
import tensorflow as tf
import numpy as np
"""
For each position there is a set of 26 letters. Output is one hot vector of words
"""
import sys
words_file = "words.txt"
if len(sys.argv) > 1:
words_file = sys.argv[1]
show_details = (len(sys.argv) > 2)
words_txt = open(words_file, "r")
words = words_txt.read().split('\n')
words.pop() # last is empty string
words_txt.close()
number_of_positions = 25
number_of_letters = 26
number_of_inputs = number_of_positions * number_of_letters
number_of_words = len(words)
number_of_outputs = len(words)
#import pdb; pdb.set_trace();
# setup input nodes for a word
def word_to_train(word, inputs):
for index, ch in enumerate(word):
inputs[index*number_of_letters + ord(ch) - ord('a')] = 1
# do the training(get_batch_size)
def train(sess, get_batch_size, train_step, x, y_):
batch_number = 0
batch_size = get_batch_size(batch_number)
x_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(number_of_words, batch_size), number_of_outputs), dtype=float)
index = 0
for word_number, word in enumerate(words):
word_to_train(word, x_array[index])
y_array[index][word_number] = 1
index += 1
if index == batch_size or word_number+1 == number_of_words:
batch_number += 1
batch_size = get_batch_size(batch_number)
index = 0
sess.run(train_step, feed_dict={x:x_array, y_: y_array})
to_do = number_of_words - word_number - 1
x_array = np.zeros(shape=(min(batch_size, to_do), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(batch_size, to_do), number_of_outputs), dtype=float)
def train_twice(sess, get_batch_size, train_step, x, y_):
train(sess, get_batch_size, train_step, x, y_)
train(sess, get_batch_size, train_step, x, y_)
def run_test(words, get_batch_size, train):
x = tf.placeholder(tf.float32, shape=[None, number_of_inputs])
y_ = tf.placeholder(tf.float32, shape=[None, number_of_outputs])
W = tf.Variable(tf.zeros([number_of_inputs, number_of_outputs]))
b = tf.Variable(tf.zeros([number_of_outputs]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
#import pdb; pdb.set_trace();
train(sess, get_batch_size, train_step, x, y_)
x_array = None
y_array = None
n_right = 0
#import pdb; pdb.set_trace();
for word_number, word in enumerate(words):
#import pdb; pdb.set_trace()
x_array = np.zeros(shape=(1, number_of_inputs), dtype=float)
word_to_train(word, x_array[0])
prediction = tf.argmax(y,1)
prediction = sess.run(prediction, feed_dict={x: x_array})
is_right = (prediction[0] == word_number)
if show_details:
if is_right:
print "Right %s" % words[word_number]
else:
print "Wrong %s found %s" % (words[word_number], words[prediction[0]])
if is_right:
n_right += 1
return n_right
def run_tests(get_batch_size, train):
for batch_size in range(100, 1001, 100):
#for batch_size in range(100, 201, 100):
n_right = run_test(words, get_batch_size(batch_size), train)
print " Batch size %d, Accuracy: %d / %d" % (batch_size, n_right, len(words))
print "Batches in same order as file, non-full batch appears last"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
return batch_size
return get_batch_size_2
print " Train once"
run_tests(get_batch_size, train)
print " Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears first"
def get_batch_size(batch_size):
def get_batch_size2(batch_number):
if batch_number == 0:
return number_of_words % batch_size
else:
return batch_size
return get_batch_size2
print " Train once"
#import pdb; pdb.set_trace();
run_tests(get_batch_size, train)
print " Train twice"
run_tests(get_batch_size, train_twice)
print "Batches in same order as file, non-full batch appears last, batch size alternate by div 2"
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
if batch_number % 2 == 0:
return batch_size / 2
else:
return batch_size
return get_batch_size_2
print " Train once"
run_tests(get_batch_size, train)
print " Train twice"
run_tests(get_batch_size, train_twice)
No comments:
Post a Comment