# 二 数据缺失处理

## 2.1 空值判定

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

ser = pd.Series(['111','222',np.NaN])
# 空判定
print(ser.isnull())


0    False
1    False
2     True
dtype: bool


## 2.2丢弃NaN

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

ser = pd.Series(['111','222',np.NaN])
# 过滤掉所有NAN
print(ser.dropna())


0    111
1    222
dtype: object


# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = pd.DataFrame([[2.5, 5, 3], [1, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]])
# 过滤掉都是NaN的列
print(data.dropna(how='all'))


     0    1    2
0  2.5  5.0  3.0
1  1.0  NaN  NaN


# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = pd.DataFrame([[2.5, 5, 3], [1, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]])
# 按列丢弃
print(data.dropna(axis=1,how='all'))


     0    1    2
0  2.5  5.0  3.0
1  1.0  NaN  NaN
2  NaN  NaN  NaN


## 2.3 数据填充

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = pd.DataFrame([[2.5, 5, 3], [1, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]])
# 填充
print(data.fillna(0))


     0    1    2
0  2.5  5.0  3.0
1  1.0  0.0  0.0
2  0.0  0.0  0.0


# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = pd.DataFrame([[2.5, 5, 3], [1, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]])
dic = {0:1, 1:2, 2:3}
# 填充
print(data.fillna(dic))


     0    1    2
0  2.5  5.0  3.0
1  1.0  2.0  3.0
2  1.0  2.0  3.0


# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = pd.DataFrame([[2.5, 5, 3], [1, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]])
# 填充
print(data.fillna(method='ffill', limit=1))


     0    1    2
0  2.5  5.0  3.0
1  1.0  5.0  3.0
2  1.0  NaN  NaN

posted @ 2020-05-07 14:58  知识追寻者  阅读(264)  评论(0编辑  收藏  举报