实战1:电影归类-----KNN

案例:
    已经有一组电影数据
        可以看出电影的数据及分类,最后一步  未知   的需要预测处于哪个分类中
        将数据中的打斗次数属性标记为X,接吻次数标记为Y,则可以将数据化为坐标中的点。
        
        E(x,y) = sqrt((x2-x1)**2 + (y2-y1)**2)
          
       # sklearn 模块的neighbors功能

  

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from sklearn import neighbors  # 导入KNN功能模块
import warnings
warnings.filterwarnings('ignore')
# 不发出警告

data = pd.DataFrame({
    'name':['北京遇上西雅图','喜欢你','疯狂动物城','战狼2','力王','敢死队'],
    'fight':[3,2,1,101,99,98],
    'kiss':[104,100,81,10,5,2],
    'type':['Romance','Romance','Romance','Action','Action','Action'],
})



plt.scatter(data[data['type'] == 'Romance']['fight'],data[data['type']=='Romance']['kiss'],color='r',marker='o',label='Romance' )
plt.scatter(data[data['type'] == 'Action']['fight'],data[data['type']=='Action']['kiss'],color='k',marker='o',label='Action' )
plt.grid()
plt.legend()
plt.ylabel('kiss')
plt.xlabel('fight')

knn = neighbors.KNeighborsClassifier()
knn.fit(data[['fight','kiss']],data['type'])
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')
 
data2 = pd.DataFrame(np.random.rand(100,2)*50,columns=['fight','kiss'])
data2['typetest'] = knn.predict(data2)

plt.scatter(data[data['type'] == 'Romance']['fight'],data[data['type']=='Romance']['kiss'],color='r',marker='o',label='Romance' )
plt.scatter(data[data['type'] == 'Action']['fight'],data[data['type']=='Action']['kiss'],color='g',marker='o',label='Action' )
plt.grid()
plt.legend()

plt.scatter(data2[data2['typetest'] == 'Romance']['fight'],data2[data2['typetest']=='Romance']['kiss'],color='r',marker='x',label='Romance' )
plt.scatter(data2[data2['typetest'] == 'Action']['fight'],data2[data2['typetest']=='Action']['kiss'],color='g',marker='x',label='Action' )
data2.head()
posted @ 2019-04-15 16:43  慕沁  阅读(335)  评论(0)    收藏  举报