决策树

 1 # encoding:utf-8
 2 
 3 import pandas as pd
 4 from sklearn.feature_extraction import DictVectorizer
 5 from sklearn.model_selection import train_test_split
 6 from sklearn import *
 7 from sklearn.datasets import *
 8 from sklearn.model_selection import *
 9 from sklearn.neighbors import *
10 from sklearn.preprocessing import *
11 from sklearn.naive_bayes import *
12 from sklearn.tree import DecisionTreeClassifier,export_graphviz
13 
14 def decision():
15     '''
16     决策树对泰坦尼克号进行预测生死
17     :return:
18     '''
19     # 获取数据
20     titan = pd.read_csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt")
21 
22     # 处理数据,找出特征值和目标值
23     x = titan[['pclass','age','sex']]
24     y = titan['survived']
25     print(x)
26 
27     # 缺失值处理 以平均值填补
28     x['age'].fillna(x['age'].mean(),inplace=True)
29 
30     # 分割数据集到训练集测试集
31     x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25)
32 
33     # 进行处理(特征工程) 特征-》类别-》one_hot编码
34     dict = DictVectorizer(sparse=False)
35     x_train = dict.fit_transform(x_train.to_dict(orient="records"))
36     print(dict.get_feature_names())
37     x_test = dict.transform(x_test.to_dict(orient="records"))
38     print(x_train)
39 
40     # 用决策树进行预测
41     dec = DecisionTreeClassifier()
42     dec.fit(x_train,y_train)
43 
44     # 预测准确率
45     print("预测的准确率:",dec.score(x_test,y_test))
46 
47     # 导出决策树的结构
48     # export_graphviz(dec,out_file="./tree.dot",feature_names=['年龄','pclass1','pclass2','pclass3','女性','男性'])
49     return None
50 
51 if __name__ == '__main__':
52     decision()

 

posted @ 2019-01-22 13:58  wydxry  阅读(307)  评论(0)    收藏  举报
Live2D