-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtf_rnn_ex.py
114 lines (96 loc) · 3.72 KB
/
tf_rnn_ex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
'''
adapting keras model to tf model from:
https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/
'''
import numpy as np
import tensorflow as tf
from tensorflow.contrib.rnn import BasicLSTMCell, BasicRNNCell, GRUCell, LSTMCell
def to_categorical(y, num_classes=None):
"""Converts a class vector (integers) to binary class matrix.
E.g. for use with categorical_crossentropy.
# Arguments
y: class vector to be converted into a matrix
(integers from 0 to num_classes).
num_classes: total number of classes.
# Returns
A binary matrix representation of the input.
"""
y = np.array(y, dtype='int')
input_shape = y.shape
if input_shape and input_shape[-1] == 1:
input_shape = tuple(input_shape[:-1])
y = y.ravel()
if not num_classes:
num_classes = np.max(y) + 1
n = y.shape[0]
categorical = np.zeros((n, num_classes))
categorical[np.arange(n), y] = 1
output_shape = input_shape + (num_classes,)
categorical = np.reshape(categorical, output_shape)
return categorical
# load ascii text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename, encoding="utf8").read()
raw_text = raw_text.lower()
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
n_chars = len(raw_text)
n_vocab = len(chars)
#print "Total Characters: ", n_chars
#print "Total Vocab: ", n_vocab
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
seq_in = raw_text[i:i + seq_length]
seq_out = raw_text[i + seq_length]
dataX.append([char_to_int[char] for char in seq_in])
dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
#print "Total Patterns: ", n_patterns
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
Y = to_categorical(dataY)
'''
Create TF model
'''
units = 256
seq_length = 100
data_x = tf.placeholder(tf.float32, shape=(None, seq_length, 1))
data_y = tf.placeholder(tf.float32, shape=(None, n_vocab))
batch_size = tf.shape(data_x)[0]
#Create tf cell, api refrence: https://www.tensorflow.org/api_docs/python/tf/contrib/rnn
rnn_cell = BasicLSTMCell(num_units=units, forget_bias=1 )
initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
#Compute RNN
outputs, state = tf.nn.dynamic_rnn(cell=rnn_cell, inputs=data_x, initial_state=initial_state, dtype=tf.float32 )
#Got from: https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/
outputs = tf.transpose(outputs, [1, 0, 2])
last = tf.gather(outputs, int(outputs.get_shape()[0]) - 1)
to_forward = tf.nn.dropout(x=last, keep_prob=0.2)
#Dens: activation(dot(input, kernel) + bias)
w_kernel = tf.Variable(tf.random_uniform(shape=(units,n_vocab)))
b_kernel = tf.Variable(tf.zeros(shape=(1,n_vocab)))
dense = tf.nn.softmax( tf.add(tf.tensordot(to_forward, w_kernel, axes=1),b_kernel) )
# scale preds so that the class probas of each sample sum to 1
dense /= tf.reduce_sum(dense, axis=tf.rank(dense) - 1, keep_dims=True)
# manual computation of crossentropy
_epsilon = tf.convert_to_tensor(10e-8, dtype=tf.float32)
scaled = tf.clip_by_value(dense, _epsilon, 1. - _epsilon)
loss = - tf.reduce_sum(data_y * tf.log(scaled), axis=tf.rank(scaled)- 1)
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
t1 = tf.shape(loss)
with tf.Session() as sess:
init = tf.initialize_all_variables()
sess.run(init)
file_val = ""
#X,Y = gen_data(file_train,seq_length)
#Now train and validate model
batch_size=128
sess.run( train_step, feed_dict={data_x:X[0:1000], data_y:Y[0:1000]})
print("Model compiled")