决策树

import os
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn import tree
from sklearn import metrics

infile = 'sales_data.xls'
os.chdir('E:\pycharm\machine learning')
data  = pd.read_excel(infile,index_col=u'序号')

#print(data.dtypes)
data[data == u'']=1
data[data == u'']=1
data[data == u'']=1
data[data != 1] = -1
print(data.dtypes)
print('----------------------------------------')
x = data.iloc[:,:-1].as_matrix().astype(int)
y = data.iloc[:,-1].as_matrix().astype(int)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.4,random_state=12345)
print('-----------------------------')
print('划分数据集:')
print(y_train)
print('-----------------------------')
print(y_train.dtype)

print('训练模型:')
clt = tree.DecisionTreeClassifier()
clt.fit(x_train,y_train)

print('-----------------------------')
print('准确率:{}'.format(metrics.accuracy_score(y_train,clt.predict(x_train))))

print('精确率:{}'.format(metrics.precision_score(y_train,clt.predict(x_train))))

print('召回率:{}'.format(metrics.recall_score(y_train,clt.predict(x_train))))

print(metrics.confusion_matrix(y_train,clt.predict(x_train)))#混淆矩阵

print(clt.predict(x_train))
print('-------预测---------实际-------------')
print(y_train)

 

posted @ 2018-04-21 09:26  python赵小弟  阅读(153)  评论(0编辑  收藏  举报