-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path03_k_nearest_neighbors.py
49 lines (39 loc) · 1.84 KB
/
03_k_nearest_neighbors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import numpy as np
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
class KNearestNeighbor:
def __init__(self, X, Y):
self.X_train = X
self.Y_train = Y
def _compute_dist(self, X):
"""Calculate distances between test points and training points.
for one sample pair, use matrix to calculate:
dist = np.sqrt(np.sum((x_i - x_j)**2, axis=-1))
for one test sample and all training samples pair, use matrix to calculate:
dist = np.sqrt(np.sum((X_train - x_j)**2, axis=-1)).reshape(1, -1)
for all train-test sample pairs, use matrix to calculate:
dist = np.sum(X_test**2, axis=-1)[:, np.newaxis] + np.sum(X_train**2, axis=-1)[:, np.newaxis].T - 2*np.dot(X_test, X_train.T)
"""
dists = np.sum(X ** 2, axis=-1).reshape(-1, 1) + np.sum(self.X_train ** 2, axis=-1).reshape(1, -1) \
- 2 * np.dot(X, self.X_train.T)
return dists
def predict(self, X, k=1):
dists = self._compute_dist(X) # [NUM_TEST, NUM_TRAIN]
indices = np.argsort(dists, axis=-1)[:, :k] # [NUM_TEST, K]
num_test, num_train = X.shape[0], self.Y_train.shape[0]
Y_closest = np.take_along_axis(np.broadcast_to(self.Y_train, (num_test, num_train)),
indices,
axis=-1)
Y_pred = np.apply_along_axis(lambda arr: Counter(arr).most_common(1)[0][0], -1, Y_closest)
return Y_pred
@staticmethod
def create_data():
data = load_iris()
return shuffle(data.data, data.target)
if __name__ == '__main__':
X, Y = KNearestNeighbor.create_data()
knn = KNearestNeighbor(X, Y)
Y_perd = knn.predict(X, k=5)
print(accuracy_score(Y, Y_perd))