案例:
已经有一组电影数据
可以看出电影的数据及分类,最后一步 未知 的需要预测处于哪个分类中
将数据中的打斗次数属性标记为X,接吻次数标记为Y,则可以将数据化为坐标中的点。
E(x,y) = sqrt((x2-x1)**2 + (y2-y1)**2)
# sklearn 模块的neighbors功能
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from sklearn import neighbors # 导入KNN功能模块
import warnings
warnings.filterwarnings('ignore')
# 不发出警告
data = pd.DataFrame({
'name':['北京遇上西雅图','喜欢你','疯狂动物城','战狼2','力王','敢死队'],
'fight':[3,2,1,101,99,98],
'kiss':[104,100,81,10,5,2],
'type':['Romance','Romance','Romance','Action','Action','Action'],
})
plt.scatter(data[data['type'] == 'Romance']['fight'],data[data['type']=='Romance']['kiss'],color='r',marker='o',label='Romance' )
plt.scatter(data[data['type'] == 'Action']['fight'],data[data['type']=='Action']['kiss'],color='k',marker='o',label='Action' )
plt.grid()
plt.legend()
plt.ylabel('kiss')
plt.xlabel('fight')
knn = neighbors.KNeighborsClassifier()
knn.fit(data[['fight','kiss']],data['type'])
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=5, p=2,
weights='uniform')
data2 = pd.DataFrame(np.random.rand(100,2)*50,columns=['fight','kiss'])
data2['typetest'] = knn.predict(data2)
plt.scatter(data[data['type'] == 'Romance']['fight'],data[data['type']=='Romance']['kiss'],color='r',marker='o',label='Romance' )
plt.scatter(data[data['type'] == 'Action']['fight'],data[data['type']=='Action']['kiss'],color='g',marker='o',label='Action' )
plt.grid()
plt.legend()
plt.scatter(data2[data2['typetest'] == 'Romance']['fight'],data2[data2['typetest']=='Romance']['kiss'],color='r',marker='x',label='Romance' )
plt.scatter(data2[data2['typetest'] == 'Action']['fight'],data2[data2['typetest']=='Action']['kiss'],color='g',marker='x',label='Action' )
data2.head()