# k近邻法( k-nearnest neighbor)

### 距离度量：

$L_p(x_i,x_j)=(\sum_{l=1}^{n}|x_i^{(l)}-x_j^{(l)}|^p)^{\frac{1}{p}}$

• 当$p=1$时，为曼哈顿距离，$L_1(x_i,x_j)=\sum_{l=1}{n}|x_i{(l)}-x_j^{(l)}|$

• 当$p=2$时，为欧式距离，$L_2(x_i,x_j)=(\sum|x_i{(l)}-x_j{(l)}|2){\frac{1}{2}}$

• 当$p=\infty$时，它是各个坐标距离的最大值，$L_\infty(x_i,x_j)=max_l|x_i{(l)}-x_j{(l)}|$

### 机器学习实战第二章代码

import numpy as np
def classify0(inx,dataSet,labels,k):
datasize=dataSet.shape[0]
diffmat=np.tile(inx,(datasize,1))-dataSet
sqdismat=diffmat**2
sqdist=sqdismat.sum(axis=1)
dist=sqdist**0.5
sortdistpos=dist.argsort()
labelscount=np.array([0,0,0,0])
for i in range(k):
votelabels=labels[sortdistpos[i]]
labelscount[votelabels]+=1
returnresult=labelscount.argsort()
return returnresult[-1]

file=open(filename)
num=len(ar)
returnMat=np.zeros((num,3))
returnLabels=[]
index=0
for line in ar:
line=line.strip()
linelist=line.split('\t')
returnMat[index,:]=linelist[0:3]
returnLabels.append(int(linelist[-1]))
index+=1
return returnMat,returnLabels

def autoNorm(dataSet):
minvals=dataSet.min(0)
maxvals=dataSet.max(0)
ranges=maxvals-minvals
normDataSet=np.zeros(np.shape(dataSet))
m=dataSet.shape[0]
normDataSet=dataSet-np.tile(minvals,(m,1))
normDataSet=normDataSet/np.tile(ranges,(m,1))
return normDataSet

normDataMat=autoNorm(DataMat)
ratio=0.1
tep=normDataMat.shape[0]
testnum=int(tep*ratio)
print(tep,testnum)
errorcount=0
for i in range(testnum):
result=classify0(normDataMat[i,:],normDataMat[testnum:tep,:],DataLabels[testnum:tep],3)
if(result!=DataLabels[i]):
errorcount+=1
print(i," the test result is ",result,",the real result is ",DataLabels[i])
print(errorcount)
print("the error ratio is ",errorcount*1.0/testnum)
posted @ 2017-07-23 23:07  LittlePointer  阅读(370)  评论(0编辑  收藏  举报