Using Neural Nets for Natural Language Processing: Arithmetic Expressions

Design a neural net that can be used to evaluate arithmetic expression of the form 1+2*3-7/4 . The first try will be a neural net that takes the operators and says which operator to apply next. The input will be positions and at each position there will be a node for each operator. Similar to the first design of the neural net to look up words. The output will be one node for each position. The position that is to be applied first will be selected.
This design does not work good enough. I also tried outputing the operator to be used and making the output structure match the input structure. The results were equally as bad.

Data

Number of positions 3
   Train once                     16 / 20
   Train twice                    16 / 20

Number of positions 3
   Train once                     56 / 84
   Train twice                    56 / 84

Number of positions 4
   Train once                     200 / 340
   Train twice                    200 / 340

Code

import tensorflow as tf
import numpy as np
"""
Let's learn how to evaulate an arithmetic expression using '+', '-', '*' and '/'
"""
import itertools
import sys
show_details = (len(sys.argv) > 1)
# no hidden layers or one hidden layer
one_level_version = True
# output layer is the operator that is select or the position in the input
#output_is = "operator"
output_is = "position"
# length of the expressions
number_of_positions = 4
# the output is the position that should be processed next
def train_data_output_is_position(number_of_positions):
data = {}
for length in range(1,number_of_positions+1):
    for input in itertools.product('+-*/', repeat=length):
      select = None
      for index, ch in enumerate(input):
        if ch == '*' or ch == '/':
          select = index
          break
      if select == None:
        select = 0
      data[input] = select
return data;
# the output is the operator that should be processed next
def train_data_output_is_operator(number_of_positions):
data = {}
for length in range(1,number_of_positions+1):
    for input in itertools.product('+-*/', repeat=length):
      select = None
      for index, ch in enumerate(input):
        if ch == '*' or ch == '/':
          select = operator_to_position(ch)
          break
      if select == None:
        select = 0
      data[input] = select
return data;
def operator_to_position(operator):
return {
    "+": 0,
    "-": 1,
    "*": 2,
    "/": 3,
}[operator]
import sys
#number_of_positions = 2
#output_is = "operator"
number_of_operators = 4
if output_is == "operator":
training_data = train_data_output_is_operator(number_of_positions)
number_of_outputs = number_of_operators
else:
training_data = train_data_output_is_position(number_of_positions)
number_of_outputs = number_of_positions

number_of_training_data = len(training_data)
size_of_sections = [
number_of_positions * number_of_operators
]
position_of_sections = [0]
last_position = 0
for size in size_of_sections:
position_of_sections.append(last_position + size)
number_of_inputs = sum(size_of_sections)
# setup input nodes for a word
def input_to_train(input, inputs):
#import pdb; pdb.set_trace()
length = len(input)
for index, operator in enumerate(input):
    inputs[index*number_of_operators + operator_to_position(operator)] = 1
# do the training(get_batch_size)
def train(sess, get_batch_size, train_step, x, y_):
#import pdb; pdb.set_trace();
batch_number = 0
batch_size = get_batch_size(batch_number)
x_array = np.zeros(shape=(min(number_of_training_data, batch_size), number_of_inputs), dtype=float)
y_array = np.zeros(shape=(min(number_of_training_data, batch_size), number_of_outputs), dtype=float)
index = 0
to_do = number_of_training_data
for input, output in training_data.iteritems():
    input_to_train(input, x_array[index])
    y_array[index][output] = 1
    index += 1
    to_do -= 1
    if index == batch_size or input == training_data.keys()[-1]:
      batch_number += 1
      batch_size = get_batch_size(batch_number)
      index = 0
      sess.run(train_step, feed_dict={x:x_array, y_: y_array})
      x_array = np.zeros(shape=(min(batch_size, to_do), number_of_inputs), dtype=float)
      y_array = np.zeros(shape=(min(batch_size, to_do), number_of_outputs), dtype=float)
def train_twice(sess, get_batch_size, train_step, x, y_):
train(sess, get_batch_size, train_step, x, y_)
train(sess, get_batch_size, train_step, x, y_)
def run_test_one_word(sess, x, y, word):
x_array = np.zeros(shape=(1, number_of_inputs), dtype=float)
input_to_train(word, x_array[0])
prediction = tf.argmax(y,1)
prediction = sess.run(prediction, feed_dict={x: x_array})
#the_y = sess.run(y, feed_dict={x: x_array})
return prediction[0]
def run_test_for_case(sess, x, y):
n_right = 0
n_checked = 0

for input, output in training_data.iteritems():
    test_words = [input]
    for test_word in test_words:
      prediction = run_test_one_word(sess, x, y, test_word)
      is_right = prediction == output

      if show_details:
        if not is_right:
          import pdb; pdb.set_trace()
          print "Wrong %s found %s was %s" % (test_word, prediction, input)
          #run_test_one_word(sess, x, y, test_word)
      if is_right:
        n_right += 1
      n_checked += 1
return (n_right, n_checked)
def run_test(words, get_batch_size, train):
x = tf.placeholder(tf.float32, shape=[None, number_of_inputs])
y_ = tf.placeholder(tf.float32, shape=[None, number_of_outputs])
#old_version = False
if one_level_version:
    W = tf.Variable(tf.zeros([number_of_inputs, number_of_outputs]))
    b = tf.Variable(tf.zeros([number_of_outputs]))
    y = tf.nn.softmax(tf.matmul(x,W) + b)
else:
    h_size = 20
    W = tf.Variable(tf.zeros([number_of_inputs, h_size]))
    H = tf.Variable(tf.zeros([h_size, number_of_outputs]))
    b = tf.Variable(tf.zeros([number_of_outputs]))
    y = tf.nn.softmax(tf.matmul(tf.matmul(x,W),H) + b)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
#import pdb; pdb.set_trace();
train(sess, get_batch_size, train_step, x, y_)
results = {}
results[""] = run_test_for_case(sess, x, y)
return results
def run_tests(get_batch_size, train):
#import pdb; pdb.set_trace();
results = run_test(training_data, get_batch_size, train)
for key in results.keys():
    (n_right, n_checked) = results[key]
    print "%50s %d / %d" % (key, n_right, n_checked)
def get_batch_size(batch_size):
def get_batch_size_2(batch_number):
    return number_of_training_data
return get_batch_size_2
print "Number of positions %s" % number_of_positions
print "   Train once"
run_tests(get_batch_size, train)
print "   Train twice"
run_tests(get_batch_size, train_twice)

Using Neural Nets for Natural Language Processing

Sunday, June 11, 2017

Arithmetic Expressions - Try 1

Data

Code

No comments:

Post a Comment

Blog Archive