机器学习常用算法之knn算法
1.基于numpy实现
from numpy import tile, array def classify0(inx, data_set, labels, k): """ 原理:使用欧式距离公式:((xa0-xb0)**2-(xa1-xb1)**2)**0.5计算出输入点到各个点的欧式距离,按照距离从小到大排列,取出前前k个的类别出现的 频率,频率最高的一个的类别即为预测分类 :param data_set: 训练的数据集 :param inx: 输入向量 :param labels: 标签向量 :param k: 最近邻数量 :return: """ # 通过shape获取行和列,返回的是包含两个元素的元组,即(行数,列数) dats_set_size = data_set.shape[0] # numpy.tile(data,(x,y)) 将数据data扩展到x行y列 # 计算输入向量和训练集的距离差,即x1-x0 diff_mat = tile(inx, (dats_set_size, 1)) - data_set # 坐标差的平方,即(x1-x0)^2 square_diff_mat = diff_mat ** 2 # 对坐标差的平方进行求和 square_diff_mat_sum = square_diff_mat.sum(axis=1) # 欧式距离 distance = square_diff_mat_sum ** 0.5 # 对距离进行排序 sort_distance = distance.argsort() # 训练数据 class_count_map = dict() for i in range(k): vote_label = labels[sort_distance[i]] class_count_map.update({vote_label: class_count_map.get(vote_label, 0) + 1}) # 训练的结果 sort_class_count = sorted(class_count_map.items(), key=lambda x: x[1], reverse=True) # 返回预测结果 return sort_class_count[0][0] def create_data_set_label(): data_set = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]]) labels = ["A", "A", "B", "B"] return data_set, labels if __name__ == "__main__": data_set, labels = create_data_set_label() inx = [0, 0] predict = classify0(inx, data_set, labels, 3) print(predict)
2.基于tensorflow实现
import numpy import tensorflow as tf from numpy import array, tile def get_data_distance(data, input_data): dats_size = data.shape[0] diff_mat = tf.subtract(data, tile(input_data, (dats_size, 1))) square_diff_mat = tf.square(diff_mat) square_diff_mat_sum = tf.reduce_sum(square_diff_mat, axis=1) distance = tf.sqrt(square_diff_mat_sum) return data, distance def classify0(data_set: array, labels: array, inx, k: int): train_dataset = tf.data.Dataset.from_tensor_slices(data_set) train_dataset = train_dataset.map(map_func=lambda d: get_data_distance(d, inx)) distance = [] for x in train_dataset.as_numpy_iterator(): distance.append(x[1][0]) distance = numpy.array(distance) sort_distance = distance.argsort() class_count_map = dict() for i in range(k): vote_label = labels[sort_distance[i]] class_count_map.update({vote_label: class_count_map.get(vote_label, 0) + 1}) sort_class_count = sorted(class_count_map.items(), key=lambda m: m[1], reverse=True) return sort_class_count[0][0] def create_data_set_label(): data_array = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]]) label_array = ["A", "A", "B", "B"] return data_array, label_array if __name__ == "__main__": data_arr, label_arr = create_data_set_label() x = [0, 0] predict = classify0(data_arr, label_arr, x, 3) print(predict)
3.基于pytorch
import numpy as np from numpy import array, tile import torch from tqdm import tqdm def classify0(inx, data_set, labels, k): dats_set_size = data_set.shape[0] test_x = torch.Tensor(tile(inx, (dats_set_size, 1))) tran_x = torch.Tensor(data_set) sort_class_count = [] for x in tqdm(test_x): dists = [] for y in tran_x: distance = torch.sum((x - y) ** 2) ** 0.5 dists.append(distance.view(1)) idxes = torch.cat(dists).argsort()[:k] unique, counts = np.unique(np.array([labels[idx] for idx in idxes]), return_counts=True) class_count_map = dict(zip(unique, counts)) sort_class_count = sorted(class_count_map.items(), key=lambda d: d[1], reverse=True) # 返回预测结果 if sort_class_count: return sort_class_count[0][0] else: return None def create_data_set_label(): data_set = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]]) labels = ["A", "A", "B", "B"] return data_set, labels if __name__ == "__main__": data_set, labels = create_data_set_label() inx = [0, 0] predict = classify0(inx, data_set, labels, 3) print(predict)
4.基于sklearn实现
from numpy import array, tile from sklearn.neighbors import KNeighborsClassifier def classify0(inx, data_set, labels, k): kNN_classifier = KNeighborsClassifier(n_neighbors=k) kNN_classifier.fit(data_set, labels) x_predict = array(inx).reshape(1, -1) y_predict = kNN_classifier.predict(x_predict) return y_predict[0] def create_data_set_label(): data_set = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]]) labels = ["A", "A", "B", "B"] return data_set, labels if __name__ == "__main__": data_set, labels = create_data_set_label() inx = [0, 0] predict = classify0(inx, data_set, labels, 3) print(predict)