import os
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn import tree
from sklearn import metrics
infile = 'sales_data.xls'
os.chdir('E:\pycharm\machine learning')
data = pd.read_excel(infile,index_col=u'序号')
#print(data.dtypes)
data[data == u'好']=1
data[data == u'是']=1
data[data == u'高']=1
data[data != 1] = -1
print(data.dtypes)
print('----------------------------------------')
x = data.iloc[:,:-1].as_matrix().astype(int)
y = data.iloc[:,-1].as_matrix().astype(int)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.4,random_state=12345)
print('-----------------------------')
print('划分数据集:')
print(y_train)
print('-----------------------------')
print(y_train.dtype)
print('训练模型:')
clt = tree.DecisionTreeClassifier()
clt.fit(x_train,y_train)
print('-----------------------------')
print('准确率:{}'.format(metrics.accuracy_score(y_train,clt.predict(x_train))))
print('精确率:{}'.format(metrics.precision_score(y_train,clt.predict(x_train))))
print('召回率:{}'.format(metrics.recall_score(y_train,clt.predict(x_train))))
print(metrics.confusion_matrix(y_train,clt.predict(x_train)))#混淆矩阵
print(clt.predict(x_train))
print('-------预测---------实际-------------')
print(y_train)