This design does not work good enough. I also tried outputing the operator to be used and making the output structure match the input structure. The results were equally as bad.
Data
Number of positions 3
Train once 16 / 20
Train twice 16 / 20
Number of positions 3
Train once 56 / 84
Train twice 56 / 84
Number of positions 4
Train once 200 / 340
Train twice 200 / 340
Code
import tensorflow as tfimport numpy as np
"""
Let's learn how to evaulate an arithmetic expression using '+', '-', '*' and '/'
"""
import itertools
import sys
show_details = (len(sys.argv) > 1)
# no hidden layers or one hidden layer
one_level_version = True
# output layer is the operator that is select or the position in the input
#output_is = "operator"
output_is = "position"
# length of the expressions
number_of_positions = 4
# the output is the position that should be processed next
def train_data_output_is_position(number_of_positions):
data = {}
for length in range(1,number_of_positions+1):
for input in itertools.product('+-*/', repeat=length):
select = None
for index, ch in enumerate(input):
if ch == '*' or ch == '/':
select = index
break
if select == None:
select = 0
data[input] = select
return data;
# the output is the operator that should be processed next
def train_data_output_is_operator(number_of_positions):
data = {}
for length in range(1,number_of_positions+1):
for input in itertools.product('+-*/', repeat=length):
select = None
for index, ch in enumerate(input):
if ch == '*' or ch == '/':
select = operator_to_position(ch)
break
if select == None:
select = 0
data[input] = select
return data;
def operator_to_position(operator):
return {
"+": 0,
"-": 1,
"*": 2,
"/": 3,
}[operator]
import sys
#number_of_positions = 2
#output_is = "operator"
number_of_operators = 4
if output_is == "operator":
training_data = train_data_output_is_operator(number_of_positions)
number_of_outputs = number_of_operators
else:
training_data = train_data_output_is_position(number_of_positions)
number_of_outputs = number_of_positions
number_of_training_data = len(training_data)
size_of_sections = [
number_of_positions * number_of_operators
]
position_of_sections = [0]
last_position = 0
for size in size_of_sections:
position_of_sections.append(last_position + size)
number_of_inputs = sum(size_of_sections)
# setup input nodes for a word
def input_to_train(input, inputs):
#import pdb; pdb.set_trace()
length = len(input)
for index, operator in enumerate(input):
inputs[index*number_of_operators + operator_to_position(operator)] = 1
# do the training(get_batch_size)
def train(sess, get_batch_size, train_step, x, y_):
#import pdb; pdb.set_trace();
batch_number = 0
batch_size = get_batch_size(batch_number)
x_array = np.zeros(shape=(min(number_of_training_data, batch_size), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(number_of_training_data, batch_size), number_of_outputs), dtype=float)
index = 0
to_do = number_of_training_data
for input, output in training_data.iteritems():
input_to_train(input, x_array[index])
y_array[index][output] = 1
index += 1
to_do -= 1
if index == batch_size or input == training_data.keys()[-1]:
batch_number += 1
batch_size = get_batch_size(batch_number)
index = 0
sess.run(train_step, feed_dict={x:x_array, y_: y_array})
x_array = np.zeros(shape=(min(batch_size, to_do), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(batch_size, to_do), number_of_outputs), dtype=float)
def train_twice(sess, get_batch_size, train_step, x, y_):
train(sess, get_batch_size, train_step, x, y_)
train(sess, get_batch_size, train_step, x, y_)
def run_test_one_word(sess, x, y, word):
x_array = np.zeros(shape=(1, number_of_inputs), dtype=float)
input_to_train(word, x_array[0])
prediction = tf.argmax(y,1)
prediction = sess.run(prediction, feed_dict={x: x_array})
#the_y = sess.run(y, feed_dict={x: x_array})
return prediction[0]
def run_test_for_case(sess, x, y):
n_right = 0
n_checked = 0
for input, output in training_data.iteritems():
test_words = [input]
for test_word in test_words:
prediction = run_test_one_word(sess, x, y, test_word)
is_right = prediction == output
if show_details:
if not is_right:
import pdb; pdb.set_trace()
print "Wrong %s found %s was %s" % (test_word, prediction, input)
#run_test_one_word(sess, x, y, test_word)
if is_right:
n_right += 1
n_checked += 1
return (n_right, n_checked)
def run_test(words, get_batch_size, train):
x = tf.placeholder(tf.float32, shape=[None, number_of_inputs])
y_ = tf.placeholder(tf.float32, shape=[None, number_of_outputs])
#old_version = False
if one_level_version:
W = tf.Variable(tf.zeros([number_of_inputs, number_of_outputs]))
b = tf.Variable(tf.zeros([number_of_outputs]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
else:
h_size = 20
W = tf.Variable(tf.zeros([number_of_inputs, h_size]))
H = tf.Variable(tf.zeros([h_size, number_of_outputs]))
b = tf.Variable(tf.zeros([number_of_outputs]))
y = tf.nn.softmax(tf.matmul(tf.matmul(x,W),H) + b)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
#import pdb; pdb.set_trace();
train(sess, get_batch_size, train_step, x, y_)
results = {}
results[""] = run_test_for_case(sess, x, y)
return results
def run_tests(get_batch_size, train):
#import pdb; pdb.set_trace();
results = run_test(training_data, get_batch_size, train)
for key in results.keys():
(n_right, n_checked) = results[key]
print "%50s %d / %d" % (key, n_right, n_checked)
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
return number_of_training_data
return get_batch_size_2
print "Number of positions %s" % number_of_positions
print " Train once"
run_tests(get_batch_size, train)
print " Train twice"
run_tests(get_batch_size, train_twice)
No comments:
Post a Comment