参考 @ https://gitee.com/yejiaxiong/scikit-learn-learning/blob/master/scikit-learn%E7%BC%96%E7%A8%8B%E7%BB%83%E4%B9%A050%E5%88%97.ipynb
data = {
'size': ['XL', 'L', 'M', np.nan, 'M', 'M'],
'color': ['red', 'green', 'blue', 'green', 'red', 'green'],
'gender': ['female', 'male', np.nan, 'female', 'female', 'male'],
'price': [199.0, 89.0, np.nan, 129.0, 79.0, 89.0],
'weight': [500, 450, 300, np.nan, 410, np.nan],
'bought': ['yes', 'no', 'yes', 'no', 'yes', 'no']
}
df=pd.DataFrame(data)
df.isnull().sum()
len(df)
#检测缺失比率
np.round(df.isnull().sum()/len(df),2)
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
df[['weight']] = imputer.fit_transform(df[['weight']])
df
#
002 填充缺失值 - 均值