# 计算先验概率 P(Y)
def prior_prob(list_Y):
probs = {}
for c in set(list_Y):
probs[c] = Y.count(c)/len(list_Y)
return probs
# 计算条件概率 P(X_i = v | Y=c) 使用拉普拉斯平滑
def conditional_prob(X, list_Y, values):
probs = {}
setY = set(list_Y)
for c in setY:
probs[c] = {}
idxc = []
for i, y in enumerate(list_Y):
if y == c:
idxc.append(i)
n_c = len(idxc)
for v in values:
count = 0
for i in idxc:
if X[i] == v:
count += 1
# 拉普拉斯平滑
probs[c][v] = (count + 1)/(n_c + len(values))
return probs
# 预测函数
def predict(x):
results = {}
for c in P_Y:
prob = P_Y[c] * P_X1_Y[c][x[0]] * P_X2_Y[c][x[1]]
results[c] = prob
# 返回概率最大类别
return max(results, key=results.get)
# 训练数据
X1 = [1,1,1,1,1,2,2,2,2,2,3,3,3,3,3]
X2 = ['S','M','M','S','S','S','S','M','M','L','L','L','M','M','L']
Y = [-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]
# 特征的取值集合
A1 = [1,2,3]
A2 = ['S','M','L']
P_Y = prior_prob(Y)
P_X1_Y = conditional_prob(X1, Y, A1)
P_X2_Y = conditional_prob(X2, Y, A2)
# 测试样本
x_test = (3,'M')
y_pred = predict(x_test)
print("样本 {} 的预测类别为: {}".format(x_test, y_pred))