2020.9.25

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 1 from numpy import*
 2 import operator
 3 from os import listdir
 4 def createDataSet():
 5     group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
 6     labels=['A','A','B','B']
 7     return group,labels
 8 def classify0(inX,dataSet,labels,k):
 9     dataSetSize=dataSet.shape[0]
10     diffMat=tile(inX,(dataSetSize,1))-dataSet
11     sqDiffMat=diffMat**2
12     sqDistances=sqDiffMat.sum(axis=1)
13     distances=sqDistances**0.5
14     sortedDistIndicies=distances.argsort()
15     classCount={}
16     for i in range(k):
17         voteIlabel=labels[sortedDistIndicies[i]]
18         classCount[voteIlabel]=classCount.get(voteIlabel,0)+1
19     sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
20     return sortedClassCount[0][0]
21 def file2matrix(filename):
22     fr=open(filename)
23     arrayOfLines=fr.readlines()
24     numberOfLines=len(arrayOfLines)
25     returnMat=zeros((numberOfLines,3))
26     classLabelVector=[]
27     index=0
28     for line in arrayOfLines:
29         line =line.strip()
30         listFromLine=line.split('\t')
31         returnMat[index,:]=listFromLine[0:3]
32         classLabelVector.append(int(listFromLine[-1]))
33         index+=1
34     return returnMat,classLabelVector
35 def autoNorm(dataSet):
36     minVals=dataSet.min(0)
37     maxVals=dataSet.max(0)
38     ranges=maxVals-minVals
39     normDataSet=zeros(shape(dataSet))
40     m=dataSet.shape[0]
41     normDataSet=dataSet-tile(minVals,(m,1))
42     normDataSet=normDataSet/tile(ranges,(m,1))
43     return normDataSet,ranges,minVals
44 def datingClassTest():
45     hoRatio=0.10
46     datingDataMat,datingLabels=file2matrix('datingTestSet2.txt')
47     normMat,ranges,minVals=autoNorm(datingDataMat)
48     m=normMat.shape[0]
49     numTestVecs=int(m*hoRatio)
50     errorCount=0.0
51     for i in range(numTestVecs):
52         classifierResult=classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
53         print("the classifier came back with: %d, the real answer is: %d" % (classifierResult,datingLabels[i]))
54         if(classifierResult!=datingLabels[i]):errorCount+=1.0
55     print("the total error rate is: %f" % (errorCount/float(numTestVecs)))
56 def classifyPerson():
57     resultList=['not at all','in small doses','in large doses']
58     percnetTats=float(input("percentage of time spent playing video games?"))
59     ffMiles=float(input("frequent flier miles earned per year?"))
60     iceCream=float(input("liters of ice cream consumed per year?"))
61     datingDataMat,datingLabels=file2matrix('datingTestSet2.txt')
62     normMat,ranges,minVals=autoNorm(datingDataMat)
63     inArr=array([ffMiles,percnetTats,iceCream])
64     classifierResult=classify0((inArr-minVals)/ranges,normMat,datingLabels,3)
65     print("You will probably like this person:",resultList[classifierResult-1])
66 def img2vector(filename):
67     returnVect=zeros((1,1024))
68     fr=open(filename)
69     for i in range(32):
70         lineStr=fr.readline()
71         for j in range(32):
72             returnVect[0,32*i+j]=int(lineStr[j])
73     return returnVect
74 def handwritingClassTest():
75     hwLabels=[]
76     trainingFileList=listdir('trainingDigits')
77     m=len(trainingFileList)
78     trainingMat=zeros((m,1024))
79     for i in range(m):
80         fileNameStr=trainingFileList[i]
81         fileStr=fileNameStr.split('.')[0]
82         classNumStr=int(fileStr.split('_')[0])
83         hwLabels.append(classNumStr)
84         trainingMat[i,:]=img2vector('trainingDigits/%s'%fileNameStr)
85     testFileList=listdir('testDigits')
86     errorCount=0.0
87     mTest=len(testFileList)
88     for i in range(mTest):
89         fileNameStr=testFileList[i]
90         fileStr=fileNameStr.split('.')[0]
91         classNumStr=int(fileStr.split('_')[0])
92         vectorUnderTest=img2vector('testDigits/%s'%fileNameStr)
93         classifierResult=classify0(vectorUnderTest,trainingMat,hwLabels,3)
94         print('the classifier came back with: %d, the real answer is: %d'%(classifierResult,classNumStr))
95         if(classifierResult!=classNumStr):errorCount+=1.0
96     print('\nthe total number of errors is: %d' %errorCount)
97     print('\nthe total error rate id: %f' % (errorCount/float(mTest)))

 

posted @ 2020-09-26 09:15  小吴不要做废吴  阅读(80)  评论(0)    收藏  举报