决策树
import os import pandas as pd from sklearn.cross_validation import train_test_split from sklearn import tree from sklearn import metrics infile = 'sales_data.xls' os.chdir('E:\pycharm\machine learning') data = pd.read_excel(infile,index_col=u'序号') #print(data.dtypes) data[data == u'好']=1 data[data == u'是']=1 data[data == u'高']=1 data[data != 1] = -1 print(data.dtypes) print('----------------------------------------') x = data.iloc[:,:-1].as_matrix().astype(int) y = data.iloc[:,-1].as_matrix().astype(int) x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.4,random_state=12345) print('-----------------------------') print('划分数据集:') print(y_train) print('-----------------------------') print(y_train.dtype) print('训练模型:') clt = tree.DecisionTreeClassifier() clt.fit(x_train,y_train) print('-----------------------------') print('准确率:{}'.format(metrics.accuracy_score(y_train,clt.predict(x_train)))) print('精确率:{}'.format(metrics.precision_score(y_train,clt.predict(x_train)))) print('召回率:{}'.format(metrics.recall_score(y_train,clt.predict(x_train)))) print(metrics.confusion_matrix(y_train,clt.predict(x_train)))#混淆矩阵 print(clt.predict(x_train)) print('-------预测---------实际-------------') print(y_train)