'''
转换数据
'''
import pandas as pd
df = pd.DataFrame([
['green', 'M', '10.2', 'class1'],
['red', 'L', '13.5', 'class2'],
['blue', 'XL', '15.3', 'class1'],
])
df.columns = ['color', 'size', 'prize', 'class label']
print(df)
size_mapping = {
'XL': 3,
'L': 2,
'M': 1
}
df['size'] = df['size'].map(size_mapping)
print(df)
class_mapping = {label: idx for idx, label in enumerate(set(df['class label']))}
df['class label'] = df['class label'].map(class_mapping)
print(df)
result = pd.get_dummies(df)
print(result)
输出结果:
color size prize class label
0 green M 10.2 class1
1 red L 13.5 class2
2 blue XL 15.3 class1
color size prize class label
0 green 1 10.2 class1
1 red 2 13.5 class2
2 blue 3 15.3 class1
color size prize class label
0 green 1 10.2 1
1 red 2 13.5 0
2 blue 3 15.3 1
size class label color_blue ... prize_10.2 prize_13.5 prize_15.3
0 1 1 0 ... 1 0 0
1 2 0 0 ... 0 1 0
2 3 1 1 ... 0 0 1
'''
转换数据----连续数据离散化
'''
import pandas as pd
import matplotlib.pyplot as mp
ages = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]
# 有一组人员年龄数据,希望将这些数据划分为'18-25','25-35','35-60','60以上'几个部分
bins = [0, 25, 35, 60, 100]
cut_1 = pd.cut(ages, bins)
print(cut_1)
data = pd.value_counts(cut_1)
data.plot(kind='bar', rot=30)
mp.show()
输出结果:
[(0, 25], (0, 25], (0, 25], (25, 35], (0, 25], ..., (25, 35], (60, 100], (35, 60], (35, 60], (25, 35]]
Length: 12
Categories (4, interval[int64]): [(0, 25] < (25, 35] < (35, 60] < (60, 100]]
![]()