-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path07_neural_network.py
143 lines (129 loc) · 5.28 KB
/
07_neural_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import numpy as np
import matplotlib.pyplot as plt
class NeuralNetwork:
def __init__(self, inp_size, out_size, hid_sizes=[6, 6]):
self.inp_size = inp_size
self.out_size = out_size
self.hid_sizes = hid_sizes
self.params = self._init_params()
@staticmethod
def create_dataset():
np.random.seed(42)
num_sample = 400
num_cls = 2
num_sample_each_cls = int(num_sample / num_cls)
dim = 2
X = np.zeros((num_sample, dim))
Y = np.zeros((num_sample, 1), dtype=np.int8)
a = 4
for cls in range(num_cls):
ix = range(num_sample_each_cls * cls, num_sample_each_cls * (cls + 1)) # index for different class
theta = np.linspace(cls * 3.12, (cls + 1) * 3.12, num_sample_each_cls) + np.random.randn(
num_sample_each_cls) * 0.2 # theta for different class
radius = a * np.sin(4 * theta) + np.random.randn(num_sample_each_cls) * 0.2 # radius for different class
X[ix] = np.c_[radius * np.sin(theta), radius * np.cos(theta)]
Y[ix] = cls
return X, Y
@staticmethod
def sigmoid(x):
return 1 / (1 + np.exp(-x))
@staticmethod
def _compute_loss(y_true, y_pred):
"""Compute cross entropy loss."""
num_samples = y_true.shape[0]
return np.sum(y_true * np.log2(y_pred) + (1 - y_true) * np.log2(1 - y_pred), axis=None) / num_samples * (-1)
def _init_params(self):
params = {}
# X of shape [N, inp_size]
# W0 of shape [inp_size, h1_size] --> X * W0 + b
# then use loop to create weight_i and bias_i
pre_size = self.inp_size
i = 0
for h_size in self.hid_sizes:
params[f'W{i}'] = np.random.randn(pre_size, h_size)
params[f'b{i}'] = np.zeros((1, h_size))
pre_size = h_size
i += 1
# last hidden layer to output layer weight and bias
params[f'W{i}'] = np.random.randn(pre_size, self.out_size)
params[f'b{i}'] = np.zeros((1, self.out_size))
# Test
print('parameters:')
for key in params.keys():
print(key, ': ', params[key].shape)
#
return params
def _forward(self, X):
"""hidden layer use tanh activation function, while output layer use sigmoid activation function."""
i = 0
cache = {}
inp = X.copy()
# hidden layers
for _ in range(len(self.hid_sizes)):
cache[f'Z{i}'] = np.dot(inp, self.params[f'W{i}']) + self.params[f'b{i}']
cache[f'A{i}'] = np.tanh(cache[f'Z{i}'])
inp = cache[f'A{i}']
i += 1
# output layer
cache[f'Z{i}'] = np.dot(inp, self.params[f'W{i}']) + self.params[f'b{i}']
cache[f'A{i}'] = self.sigmoid(cache[f'Z{i}'])
return cache[f'A{i}'], cache
def _backward(self, cache, X, Y):
i = len(self.hid_sizes)
A_n = cache[f'A{i}'] # output from output layer (after sigmoid activation). [N, 1]
dZ_n = A_n - Y # [N, 1]
num_samples = X.shape[1]
# back propagation in hidden layers
# pass in dZ_i, A_(i-1), W_i to calculate dW_i, db_i, dZ_(i-1). Let j = i - 1
dZ_i = dZ_n
grads = {}
for _ in range(len(self.hid_sizes)):
grads[f'dW{i}'] = np.dot(cache[f'A{i - 1}'].T, dZ_i) / num_samples
grads[f'db{i}'] = np.sum(dZ_i, axis=0, keepdims=True) / num_samples
dZ_i = np.dot(dZ_i, self.params[f'W{i}'].T) * (1 - cache[f'A{i - 1}'] ** 2) # tanh(x)' = 1 - tanh(x)^2
i -= 1
grads[f'dW{i}'] = np.dot(X.T, dZ_i) / num_samples
grads[f'db{i}'] = np.sum(dZ_i, axis=0, keepdims=True) / num_samples
return grads
def _update(self, grads, lr):
i = 0
for _ in range(len(self.hid_sizes) + 1):
self.params[f'W{i}'] -= lr * grads[f'dW{i}']
self.params[f'b{i}'] -= lr * grads[f'db{i}']
i += 1
def train(self, X, Y, lr, epochs):
loss_list = []
for epoch in range(epochs):
# forward
Y_pred, cache = self._forward(X)
# compute loss
loss = self._compute_loss(Y, Y_pred)
loss_list.append(loss)
# backward
grads = self._backward(cache, X, Y)
# update parameters
self._update(grads, lr)
if epoch % 100 == 0:
print(f'Epoch {epoch} loss {loss:.4f}')
return loss_list
def predict(self, X):
Y_pred, _ = self._forward(X)
return (Y_pred > 0.5).astype(np.int8)
if __name__ == '__main__':
# create dataset
X, Y = NeuralNetwork.create_dataset()
# train model
nn = NeuralNetwork(2, 1, [10, 10])
loss_list = nn.train(X, Y, 0.001, 10000)
# plot loss
plt.plot(loss_list, label='loss')
plt.show()
# plot sample points and decision boundary
plt.scatter(X[:, 0], X[:, 1], c=Y[:, 0], cmap=plt.cm.Spectral)
_x_min, _x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
_y_min, _y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(_x_min, _x_max, 0.02), np.arange(_y_min, _y_max, 0.02))
Z = nn.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu, alpha=0.1)
plt.show()