2020.9.25












1 from numpy import* 2 import operator 3 from os import listdir 4 def createDataSet(): 5 group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) 6 labels=['A','A','B','B'] 7 return group,labels 8 def classify0(inX,dataSet,labels,k): 9 dataSetSize=dataSet.shape[0] 10 diffMat=tile(inX,(dataSetSize,1))-dataSet 11 sqDiffMat=diffMat**2 12 sqDistances=sqDiffMat.sum(axis=1) 13 distances=sqDistances**0.5 14 sortedDistIndicies=distances.argsort() 15 classCount={} 16 for i in range(k): 17 voteIlabel=labels[sortedDistIndicies[i]] 18 classCount[voteIlabel]=classCount.get(voteIlabel,0)+1 19 sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) 20 return sortedClassCount[0][0] 21 def file2matrix(filename): 22 fr=open(filename) 23 arrayOfLines=fr.readlines() 24 numberOfLines=len(arrayOfLines) 25 returnMat=zeros((numberOfLines,3)) 26 classLabelVector=[] 27 index=0 28 for line in arrayOfLines: 29 line =line.strip() 30 listFromLine=line.split('\t') 31 returnMat[index,:]=listFromLine[0:3] 32 classLabelVector.append(int(listFromLine[-1])) 33 index+=1 34 return returnMat,classLabelVector 35 def autoNorm(dataSet): 36 minVals=dataSet.min(0) 37 maxVals=dataSet.max(0) 38 ranges=maxVals-minVals 39 normDataSet=zeros(shape(dataSet)) 40 m=dataSet.shape[0] 41 normDataSet=dataSet-tile(minVals,(m,1)) 42 normDataSet=normDataSet/tile(ranges,(m,1)) 43 return normDataSet,ranges,minVals 44 def datingClassTest(): 45 hoRatio=0.10 46 datingDataMat,datingLabels=file2matrix('datingTestSet2.txt') 47 normMat,ranges,minVals=autoNorm(datingDataMat) 48 m=normMat.shape[0] 49 numTestVecs=int(m*hoRatio) 50 errorCount=0.0 51 for i in range(numTestVecs): 52 classifierResult=classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3) 53 print("the classifier came back with: %d, the real answer is: %d" % (classifierResult,datingLabels[i])) 54 if(classifierResult!=datingLabels[i]):errorCount+=1.0 55 print("the total error rate is: %f" % (errorCount/float(numTestVecs))) 56 def classifyPerson(): 57 resultList=['not at all','in small doses','in large doses'] 58 percnetTats=float(input("percentage of time spent playing video games?")) 59 ffMiles=float(input("frequent flier miles earned per year?")) 60 iceCream=float(input("liters of ice cream consumed per year?")) 61 datingDataMat,datingLabels=file2matrix('datingTestSet2.txt') 62 normMat,ranges,minVals=autoNorm(datingDataMat) 63 inArr=array([ffMiles,percnetTats,iceCream]) 64 classifierResult=classify0((inArr-minVals)/ranges,normMat,datingLabels,3) 65 print("You will probably like this person:",resultList[classifierResult-1]) 66 def img2vector(filename): 67 returnVect=zeros((1,1024)) 68 fr=open(filename) 69 for i in range(32): 70 lineStr=fr.readline() 71 for j in range(32): 72 returnVect[0,32*i+j]=int(lineStr[j]) 73 return returnVect 74 def handwritingClassTest(): 75 hwLabels=[] 76 trainingFileList=listdir('trainingDigits') 77 m=len(trainingFileList) 78 trainingMat=zeros((m,1024)) 79 for i in range(m): 80 fileNameStr=trainingFileList[i] 81 fileStr=fileNameStr.split('.')[0] 82 classNumStr=int(fileStr.split('_')[0]) 83 hwLabels.append(classNumStr) 84 trainingMat[i,:]=img2vector('trainingDigits/%s'%fileNameStr) 85 testFileList=listdir('testDigits') 86 errorCount=0.0 87 mTest=len(testFileList) 88 for i in range(mTest): 89 fileNameStr=testFileList[i] 90 fileStr=fileNameStr.split('.')[0] 91 classNumStr=int(fileStr.split('_')[0]) 92 vectorUnderTest=img2vector('testDigits/%s'%fileNameStr) 93 classifierResult=classify0(vectorUnderTest,trainingMat,hwLabels,3) 94 print('the classifier came back with: %d, the real answer is: %d'%(classifierResult,classNumStr)) 95 if(classifierResult!=classNumStr):errorCount+=1.0 96 print('\nthe total number of errors is: %d' %errorCount) 97 print('\nthe total error rate id: %f' % (errorCount/float(mTest)))

浙公网安备 33010602011771号