2.x ESL第二章习题 2.8

题目

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd
import numpy as np
train_full=pd.read_csv('../zip.train',sep=' ',engine='c',header=None).values[:,0:-1]
test_full=pd.read_csv('../zip.test',sep=' ',engine='c',header=None).values
   
train = np.vstack((train_full[train_full[:,0]==2],train_full[train_full[:,0]==3]))
test = np.vstack((test_full[test_full[:,0]==2],test_full[test_full[:,0]==3]))
   
train_x=train[:,1:]
train_y=train[:,0]
test_x=test[:,1:]
test_y=test[:,0]
   
dc = []
from sklearn.linear_model import LinearRegression
lrcf = LinearRegression()
lrcf.fit(train_x, train_y)
dc.append(('linear regression',lrcf))
   
from sklearn.neighbors import KNeighborsClassifier
for i in [1,3,5,7,15]:
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(train_x,train_y)
    dc.append(('%d-nearest neighbor' %(i),knn))
   
def acc(clf,x,y):
    res = clf.predict(x)
    if type(clf)==LinearRegression:
        res[res>2.5]=3
        res[res<2.5]=2
    n = y.shape[0]
    r = res[(res==y)].shape[0]
    return r*1.0/n
   
for i in dc:
    accr = acc(i[1],test_x,test_y)
    print '%s:  %.5f' %(i[0],accr)

结果

linear regression:  0.95879

1-nearest neighbor:  0.97527

3-nearest neighbor:  0.96978

5-nearest neighbor:  0.96978

7-nearest neighbor:  0.96703

15-nearest neighbor:  0.96154



posted @ 2015-08-12 17:13  porco  阅读(251)  评论(0编辑  收藏  举报