# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
data = pd.read_csv(r'data.csv')
train = data.ix[0:4000,:]
Y = list(train.columns)[-1]
C = list(train[Y].unique())
test = data.iloc[601:611,0:6]
Xtest = test.iloc[:,0:5]
#当预测值为c的情况下的概率
def XP(testline,c,trainpart):
Xp = 1
for index in testline.index:
x = testline[index]
xp = len(trainpart[trainpart[index]==x])/len(trainpart)
print(len(trainpart[trainpart[index]==x]))
Xp=Xp*xp
print(Xp)
return Xp
#预测值为C的概率
def YP(train,C,testline):
p=[]
for c in C:
trainpart = train[train[Y]==c]
Yp = XP(testline,c,trainpart)*(len(trainpart)/len(train))
p.append(Yp)
p = np.array(p)
print(p)
return C[p.argmax()]
#每行记录的预测值
def NB(train,Xtest,C):
result=[]
for i in Xtest.index:
testline = Xtest.ix[i,:]
result.append(YP(train,C,testline))
return result
result = NB(train,Xtest,C)