# 二 重复数据处理

## 2.1 构造数据

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = {
'user' : ['zszxz','zszxz','rose'],
'price' : [100, 100, -300],
}
frame  = pd.DataFrame(data)
print(frame)


    user  price    hobby
0  zszxz    100  reading
1  zszxz    100  reading
2   rose   -300   hiking


## 2.2 判定重复与处理

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = {
'user' : ['zszxz','zszxz','rose'],
'price' : [100, 100, -300],
}
frame  = pd.DataFrame(data)
# 判定行重复 返回Series
print(frame.duplicated())


0    False
1     True
2    False
dtype: bool


# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = {
'user' : ['zszxz','zszxz','rose'],
'price' : [100, 100, -300],
}
frame  = pd.DataFrame(data)
# 过滤掉重复行
print(frame[frame.duplicated()])


    user  price    hobby
1  zszxz    100  reading


# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = {
'user' : ['zszxz','zszxz','rose'],
'price' : [100, 100, -300],
}
frame  = pd.DataFrame(data)


    user  price    hobby
0  zszxz    100  reading
2   rose   -300   hiking


## 2.3 删除行或者列

data = {
'user' : ['zszxz','zszxz','rose'],
'price' : [100, 100, -300],
}
del_frame = frame.drop([0])
print(del_frame)


    user  price    hobby
1  zszxz    100  reading
2   rose   -300   hiking


# 三 数据替换

## 3.1 单值替换

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = {
'user' : ['zszxz','zszxz','rose'],
'price' : [100, 100, -300],
}
frame  = pd.DataFrame(data)
re_frame = frame.replace(-300,200)
print(re_frame)


    user  price    hobby
0  zszxz    100  reading
1  zszxz    100  reading
2   rose    200   hiking


## 3.2 多值替换

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = {
'user' : ['zszxz','zszxz','rose'],
'price' : [100, 100, -300],
}
frame  = pd.DataFrame(data)
re_frame = frame.replace([100,-300],[200,300])
print(re_frame)


    user  price    hobby
0  zszxz    200  reading
1  zszxz    200  reading
2   rose    300   hiking


## 3.3 字典形式替换

replace()函数还支持字典形式入参，示例如下

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np

data = {
'user' : ['zszxz','zszxz','rose'],
'price' : [100, 100, -300],
}
frame  = pd.DataFrame(data)
re_frame = frame.replace({-300:200})
print(re_frame)


    user  price    hobby
0  zszxz    100  reading
1  zszxz    100  reading
2   rose    200   hiking


# 四 数据添加

data = {
'user' : ['zszxz','zszxz','rose'],
'price' : [100, 100, -300],
}
frame  = pd.DataFrame(data)
# 映射到 hobby 列 添加对应值
print(frame)


    user  price    hobby  term
0  zszxz    100  reading   300
1  zszxz    100  reading   300
2   rose   -300   hiking     5


# 五 重命名索引

data = {
'user' : ['zszxz','zszxz','rose'],
'price' : [100, 100, -300],
}
frame  = pd.DataFrame(data)
# 重命名索引
reindex = {0:"user1", 1:"user2", 2:"user3"}
rename_frame = frame.rename(reindex)
print(rename_frame)


        user  price    hobby  term
user1  zszxz    100  reading   300
user2  zszxz    100  reading   300
user3   rose   -300   hiking     5

posted @ 2020-05-07 14:59  知识追寻者  阅读(935)  评论(0编辑  收藏  举报