# TensorFlow泰坦尼克号实战

• 本次实战数据来源于Kaggle，网址是Titanic: Machine Learning from Disaster，可以自行下载，
• 本次任务的目标是根据乘客信息预测他们在Titanic号撞击冰山沉没后能否生存。

## 准备数据

### 读取数据

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import models,layers



### 数据预处理

def preprocessing(dfdata):

dfresult= pd.DataFrame()

#Pclass
dfPclass = pd.get_dummies(dfdata['Pclass'])
dfPclass.columns = ['Pclass_' +str(x) for x in dfPclass.columns ]
dfresult = pd.concat([dfresult,dfPclass],axis = 1)

#Sex
dfSex = pd.get_dummies(dfdata['Sex'])
dfresult = pd.concat([dfresult,dfSex],axis = 1)

#Age
dfresult['Age'] = dfdata['Age'].fillna(0)
dfresult['Age_null'] = pd.isna(dfdata['Age']).astype('int32')

#SibSp,Parch,Fare
dfresult['SibSp'] = dfdata['SibSp']
dfresult['Parch'] = dfdata['Parch']
dfresult['Fare'] = dfdata['Fare']

#Carbin
dfresult['Cabin_null'] =  pd.isna(dfdata['Cabin']).astype('int32')

#Embarked
dfEmbarked = pd.get_dummies(dfdata['Embarked'],dummy_na=True)
dfEmbarked.columns = ['Embarked_' + str(x) for x in dfEmbarked.columns]
dfresult = pd.concat([dfresult,dfEmbarked],axis = 1)

return(dfresult)

x_train = preprocessing(dftrain_raw)
y_train = dftrain_raw['Survived'].values

x_test = preprocessing(dftest_raw)
y_test = dftest_raw['Survived'].values

print("x_train.shape =", x_train.shape )
print("x_test.shape =", x_test.shape )


posted @ 2020-03-27 22:03  景云ⁿ  阅读(314)  评论(0编辑  收藏  举报