part 2
df_train['embarked'] = df_train['embarked'].fillna('S')
df_train.drop(['boat'], axis='columns', inplace=True)
df_train
df_train.drop(['body'], axis='columns', inplace=True)
df_train_temp = df_train.drop(['home.dest'], axis='columns', inplace=False)
df_train_temp
df_train.drop(['home.dest'], axis='columns', inplace=True)
df_train['fare'] = df_train['fare'].fillna(0)
df_train.head(10)
# cabin컬럼값을 cabin값의 첫 글자로 바꾼다
df_train['cabin'] = df_train['cabin'].apply(lambda x: x[0])
df_train['cabin'].value_counts()
df_train['sex'].value_counts()
df_train['embarked'].value_counts()
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
sns.set_palette(sns.color_palette('Set2', 10))
sns.factorplot('sex', kind='count', data=df_train)
# 등급 가운트로 시각화
sns.factorplot('pclass', kind='count', data=df_train)
# 나이를 히스토그램으로 시각화한다
df_train['age'].hist()
sns.factorplot('cabin', kind='count', data=df_train)
sns.factorplot('cabin', kind='count', data=df_train[df_train['cabin']!='N'])
sns.factorplot('embarked', kind='count', data=df_train)
sns.factorplot('survived', kind='count', hue='sex', data=df_train)
sns.factorplot('pclass', kind='count', hue='survived', data=df_train)
sns.factorplot('sex', kind='count', hue = 'survived', data=df_train)