# coding: utf-8
# In[6]:
# -*- coding: utf-8 -*-
import urllib.request
import os
# In[7]:
url="http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic3.xls"
filepath="data/titanic3.xls"
if not os.path.isfile(filepath):
result=urllib.request.urlretrieve(url,filepath)
print('downloaded:',result)
# In[8]:
import numpy
import pandas as pd
# In[9]:
all_df = pd.read_excel(filepath)
# In[10]:
all_df[:2]
# In[11]:
cols=['survived','name','pclass' ,'sex', 'age', 'sibsp',
'parch', 'fare', 'embarked']
all_df=all_df[cols]
# In[12]:
all_df[:2]
# In[13]:
all_df.isnull().sum()
# In[14]:
df=all_df.drop(['name'], axis=1)
# In[15]:
age_mean = df['age'].mean()
df['age'] = df['age'].fillna(age_mean)
# In[16]:
fare_mean = df['fare'].mean()
df['fare'] = df['fare'].fillna(fare_mean)
# In[17]:
df['sex']= df['sex'].map({'female':0, 'male': 1}).astype(int)
# In[18]:
df[:2]
# In[19]:
x_OneHot_df = pd.get_dummies(data=df,columns=["embarked" ])
# In[20]:
x_OneHot_df[:2]
# In[21]:
ndarray = x_OneHot_df.values
# In[22]:
ndarray.shape
# In[23]:
ndarray[:2]
# In[26]:
Label = ndarray[:,0]
Features = ndarray[:,1:]
# In[27]:
Features.shape
# In[28]:
Features[:2]
# In[29]:
Label.shape
# In[32]:
Label[:2]
# In[33]:
from sklearn import preprocessing
# In[34]:
minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1))
# In[35]:
scaledFeatures=minmax_scale.fit_transform(Features)
# In[36]:
scaledFeatures[:2]
# In[37]:
Label[:5]
# In[38]:
msk = numpy.random.rand(len(all_df)) < 0.8
train_df = all_df[msk]
test_df = all_df[~msk]
# In[39]:
print('total:',len(all_df),
'train:',len(train_df),
'test:',len(test_df))
# In[40]:
def PreprocessData(raw_df):
df=raw_df.drop(['name'], axis=1)
age_mean = df['age'].mean()
df['age'] = df['age'].fillna(age_mean)
fare_mean = df['fare'].mean()
df['fare'] = df['fare'].fillna(fare_mean)
df['sex']= df['sex'].map({'female':0, 'male': 1}).astype(int)
x_OneHot_df = pd.get_dummies(data=df,columns=["embarked" ])
ndarray = x_OneHot_df.values
Features = ndarray[:,1:]
Label = ndarray[:,0]
minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1))
scaledFeatures=minmax_scale.fit_transform(Features)
return scaledFeatures,Label
# In[41]:
train_Features,train_Label=PreprocessData(train_df)
test_Features,test_Label=PreprocessData(test_df)
# In[42]:
train_Features[:2]
# In[43]:
train_Label[:2]
# coding: utf-8
# In[1]:
import numpy
import pandas as pd
from sklearn import preprocessing
numpy.random.seed(10)
# In[2]:
all_df = pd.read_excel("data/titanic3.xls")
# In[3]:
cols=['survived','name','pclass' ,'sex', 'age', 'sibsp',
'parch', 'fare', 'embarked']
all_df=all_df[cols]
# In[4]:
msk = numpy.random.rand(len(all_df)) < 0.8
train_df = all_df[msk]
test_df = all_df[~msk]
# In[5]:
print('total:',len(all_df),
'train:',len(train_df),
'test:',len(test_df))
# In[6]:
def PreprocessData(raw_df):
df=raw_df.drop(['name'], axis=1)
age_mean = df['age'].mean()
df['age'] = df['age'].fillna(age_mean)
fare_mean = df['fare'].mean()
df['fare'] = df['fare'].fillna(fare_mean)
df['sex']= df['sex'].map({'female':0, 'male': 1}).astype(int)
x_OneHot_df = pd.get_dummies(data=df,columns=["embarked" ])
ndarray = x_OneHot_df.values
Features = ndarray[:,1:]
Label = ndarray[:,0]
minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1))
scaledFeatures=minmax_scale.fit_transform(Features)
return scaledFeatures,Label
# In[7]:
train_Features,train_Label=PreprocessData(train_df)
test_Features,test_Label=PreprocessData(test_df)
# In[8]:
from keras.models import Sequential
from keras.layers import Dense,Dropout
# In[9]:
model = Sequential()
# In[10]:
model.add(Dense(units=40, input_dim=9,
kernel_initializer='uniform',
activation='relu'))
# In[11]:
model.add(Dense(units=30,
kernel_initializer='uniform',
activation='relu'))
# In[12]:
model.add(Dense(units=1,
kernel_initializer='uniform',
activation='sigmoid'))
# In[13]:
model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy'])
# In[14]:
train_history =model.fit(x=train_Features,
y=train_Label,
validation_split=0.1,
epochs=30,
batch_size=30,verbose=2)
# In[15]:
import matplotlib.pyplot as plt
def show_train_history(train_history,train,validation):
plt.plot(train_history.history[train])
plt.plot(train_history.history[validation])
plt.title('Train History')
plt.ylabel(train)
plt.xlabel('Epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# In[16]:
show_train_history(train_history,'acc','val_acc')
# In[17]:
show_train_history(train_history,'loss','val_loss')
# In[18]:
scores = model.evaluate(x=test_Features,
y=test_Label)
# In[19]:
scores[1]