Python (Naïve Bayes) (classify algorithm)

a b c d 类

p(a)>p(b)>p(c)>p(d) m 属于 a 类

class Bayes:
    def __init__(self):
        self.length=-1
        self.label=dic()
        self.vector=dic()
    def fit (self,dataSet,labels):
        if(len(dataSet))!=len(labels):
            raise ValueError ("Wrong imput!")
        self.length=len(dataSet[0])#测试特征值长度
        labelsnum=len(labels)#所有类别数量
        norlabels=set(labels)#不重复类别
        for item in norlabels:
            thislabel=item
            labelcount[thislabel]=labels.count(thislabel)/labelsnum#当前种类占总类别的比例
        for vector,labels in zip(dataSet,labels):
            if (label not in vectorcount):
                self.vectorcount[label]=[]
            self.vectorcount[label].append(vector)
        print ("train finished!")
        return self
    def bayes_test(self,testData,labelSet):
        if (self.length==-1):
            raise ValueError("you havn't taining yet!")
        #计算 testdata 各个类别的概率
        lbdic=dic()
        for thislb in labelSet:
            p=1
            alllabel=self.labelcount[thislb]
            allvector=self.vectorvount[thislb]
            vnum=len(allvector)
            allvector = numpy.array(allvector).T
            for index in range(0,len(testData)):
                vector=list(allvector[indext])
                p*=vector.count(testData[indext])/vnum
            lbdic[thislb]=p*alllabel
        thislb=sorted(lbdic,key=lambda x:lbdic[x],reverse=True)[0]
        return thislb
*********************************************************************************************

import numpy as npy
class Bayes:
    def __init__(self):
        self.length=-1
        self.labelcount=dict()
        self.vectorcount=dict()
    def fit(self,dataSet:list,labels:list):
        if(len(dataSet)!=len(labels)):
            raise ValueError("您输入的测试数组跟类别数组长度不一致")
        self.length=len(dataSet[0])#测试数据特征值的长度
        labelsnum=len(labels)#类别所有的数量
        norlabels=set(labels)#不重复类别的数量
        for item in norlabels:
            thislabel=item
            labelcount[thislabel]=labels.count(thislabel)/labelsnum#求的当前类别占类别总数的比例
        for vector,label in zip(dataSet,labels):
            if(label not in vectorcount):
                self.vectorcount[label]=[]
            self.vectorcount[label].append(vector)
        print("训练结束")
        return self
    def btest(self,TestData,labelsSet):
        if(self.length==-1):
            raise ValueError("您还没有进行训练，请先训练")
        #计算testdata分别为各个类别的概率
        lbDict=dict()
        for thislb in labelsSet:
            p=1
            alllabel=self.labelcount[thislb]
            allvector=self.vectorcount[thislb]
            vnum=len(allvector)
            allvector=numpy.array(allvector).T
            for index in range(0,len(TestData)):
                vector=list(allvector[index])
                p*=vector.count(TestData[index])/vnum
            lbDict[thislb]=p*alllabel
        thislabel=sorted(lbDict,key=lambda x:lbDict[x],reverse=True)[0]
        return thislabel

posted @ 2018-01-15 16:14 兔子的尾巴_Mini 阅读(419) 评论(0) 收藏举报

刷新页面返回顶部

兔子的尾巴_Mini

Python (Naïve Bayes) (classify algorithm)

公告