Pandas学习之Series

1、Series

1.1 Series的创建

import pandas as pd
ss=pd.Series([1,2,3],[4,5,6])
print(ss)
#自定义索引
s=pd.Series([10,2,3,4,5],index=['A','B','C','D','E'])
print(s)

4 1
5 2
6 3
dtype: int64
A 10
B 2
C 3
D 4
E 5
dtype: int64

#定义name
s=pd.Series([10,2,3,4,5],index=['A','B','C','D','E'],name='Mouth')
print(s)

A 10
B 2
C 3
D 4
E 5
Name: Mouth, dtype: int64

#通过字典方式创建
s=pd.Series({"a":1,"b":2,"c":3})
print(s)

s1=pd.Series(s,index=["a","c"])
print(s1)

a 1
b 2
c 3
dtype: int64
a 1
c 3
dtype: int64

1.2 Series的属性

s=pd.Series([1,2,3,4,5],index=['A','B','C','D','E'])
print(s)
print(F"索引:{s.index}")    #也可以用s.keys()获取索引
print(F"数值:{s.values}")
print(F"维度:{s.ndim}")
print(F"形状:{s.shape}")
print(F"大小:{s.size}")

A 1
B 2
C 3
D 4
E 5
dtype: int64
索引:Index(['A', 'B', 'C', 'D', 'E'], dtype='object')
数值:[1 2 3 4 5]
维度:1
形状:(5,)
大小:5

print(s.loc['B'])#显式索引
print(s.iloc[1])#隐式索引
#切片操作
print(s.loc['A':'B'])#注意显式的索引是双闭区间
print(s.iloc[0:2])

print(s.at['C'])#显式索引
print(s.iat[2])#隐式索引

2
2
A 1
B 2
dtype: int64
A 1
B 2
dtype: int64
3
3

1.3 Series的数据访问

s=pd.Series([1,2,3,4,5,6],index=['A','B','C','D','E','F'])
print(s['A'],'\n')
print(s[s<4],'\n')
print(s.head(3),'\n')#head函数默认打印前五行数据
print(s.tail(4),'\n')#tail函数默认打印后五行数据

A 1
B 2
C 3
dtype: int64

C 3
D 4
E 5
F 6
dtype: int64

1.4 Series的常用函数

import numpy as np
s=pd.Series([1,2,np.nan,None,3,4,5],index=['A','B','C','D','E','F','G'],name="data")
s.head(3)
s.tail(6)

B 2.0
C NaN
D NaN
E 3.0
F 4.0
G 5.0
Name: data, dtype: float64

#查看所有的描述性信息
s.describe()    #s.count()之类的函数可以参看numpy

count 5.000000
mean 3.000000
std 1.581139
min 1.000000
25% 2.000000
50% 3.000000
75% 4.000000
max 5.000000
Name: data, dtype: float64

#获取元素个数
print(s.count(),'\n')
#检查是否是缺失值
print(s.isna())
#判断数据有无
s.isin([4,5])

A False
B False
C True
D True
E False
F False
G False
Name: data, dtype: bool

A False
B False
C False
D False
E False
F True
G True
Name: data, dtype: bool

s.sort_values()#按值排序后自动清楚NaN
s['C']=3
print(F"众数\n{s.mode()}\n")
print(F"出现次数统计\n{s.value_counts()}")
#去重
s.drop_duplicates()
# s.unique()  #另一种去重方式

print(F"去重后的元素个数:{s.nunique()}")

众数
0 3.0
Name: data, dtype: float64

出现次数统计
data
3.0 2
1.0 1
2.0 1
4.0 1
5.0 1
Name: count, dtype: int64
去重后的元素个数:5

s.sort_index()#按索引排序

A 1.0
B 2.0
C 3.0
D NaN
E 3.0
F 4.0
G 5.0
Name: data, dtype: float64

print(s)
s.diff()#差分序列 s[i]-=s[i-1]

A 1.0
B 2.0
C 3.0
D NaN
E 3.0
F 4.0
G 5.0
Name: data, dtype: float64

A NaN
B 1.0
C 1.0
D NaN
E NaN
F 1.0
G 1.0
Name: data, dtype: float64

1.5 Series使用案例

1.5.1 学生成绩统计

创建一个包含10名学生成绩的Series,成绩范围在50——100之间

计算平均分最高分最低分，并找出高于平均分的学生人数

import numpy as np
import pandas as pd
np.random.seed(42)
values=np.random.randint(50,101,10)
indexes=[]
for i in range(1,11): indexes.append('学生'+str(i))
scores=pd.Series(values,indexes)
print(scores)
print(F"平均分:{scores.mean()}")
print(F"最高分:{scores.max()}")
print(F"最低分:{scores.min()}")
means=scores.mean()
high_scores=scores[scores>means]
print(high_scores)
print(high_scores.count())

学生1 88
学生2 78
学生3 64
学生4 92
学生5 57
学生6 70
学生7 88
学生8 68
学生9 72
学生10 60
dtype: int32
平均分:73.7
最高分:92
最低分:57
学生1 88
学生2 78
学生4 92
学生7 88
dtype: int32
4

1.5.2 销售数据分析

某产品过去12个月的销售量Series

计算季度平均销量

找出销量最高的月份

计算月环比增长率

找出连续增长超过两个月的月份

indexs=pd.date_range('2026-01-01',periods=12,freq='ME')
print(indexs)
sales=pd.Series([120,135,145,160,155,170,180,175,190,200,210,220],indexs)
print(sales)
print(F"季度平均销售量:\n{sales.resample('QE').mean()}")

DatetimeIndex(['2026-01-31', '2026-02-28', '2026-03-31', '2026-04-30',
'2026-05-31', '2026-06-30', '2026-07-31', '2026-08-31',
'2026-09-30', '2026-10-31', '2026-11-30', '2026-12-31'],
dtype='datetime64[ns]', freq='ME')
2026-01-31 120
2026-02-28 135
2026-03-31 145
2026-04-30 160
2026-05-31 155
2026-06-30 170
2026-07-31 180
2026-08-31 175
2026-09-30 190
2026-10-31 200
2026-11-30 210
2026-12-31 220
Freq: ME, dtype: int64
季度平均销售量:
2026-03-31 133.333333
2026-06-30 161.666667
2026-09-30 181.666667
2026-12-31 210.000000
Freq: QE-DEC, dtype: float64

print(F"销量最高月份:\n{sales.idxmax()}")
print(F"月环比增长率:\n{sales.pct_change()}")

销量最高月份:
2026-12-31 00:00:00
月环比增长率:
2026-01-31 NaN
2026-02-28 0.125000
2026-03-31 0.074074
2026-04-30 0.103448
2026-05-31 -0.031250
2026-06-30 0.096774
2026-07-31 0.058824
2026-08-31 -0.027778
2026-09-30 0.085714
2026-10-31 0.052632
2026-11-30 0.050000
2026-12-31 0.047619
Freq: ME, dtype: float64

#输出所有连续增长>=3个月的月份
a=sales.pct_change()>0
print(a)
a[a.rolling(3).sum()>=3].keys().tolist()

2026-01-31 False
2026-02-28 True
2026-03-31 True
2026-04-30 True
2026-05-31 False
2026-06-30 True
2026-07-31 True
2026-08-31 False
2026-09-30 True
2026-10-31 True
2026-11-30 True
2026-12-31 True
Freq: ME, dtype: bool

[Timestamp('2026-04-30 00:00:00'),
Timestamp('2026-11-30 00:00:00'),
Timestamp('2026-12-31 00:00:00')]

1.5.3 每小时销售额统计

现有某商店每小时销售额Series

按天重采样每日销售额

计算每天营业时间(8-22)和非营业时间的的销售额比例

找出销售额最高的3个小时

np.random.seed(24)
hours_sales=pd.Series(np.random.randint(0,100,24),index=pd.date_range('2026-03-01-00:00:00',periods=24,freq='h'))
print(hours_sales)

2026-03-01 00:00:00 34
2026-03-01 01:00:00 3
2026-03-01 02:00:00 64
2026-03-01 03:00:00 87
2026-03-01 04:00:00 17
2026-03-01 05:00:00 17
2026-03-01 06:00:00 1
2026-03-01 07:00:00 79
2026-03-01 08:00:00 4
2026-03-01 09:00:00 99
2026-03-01 10:00:00 82
2026-03-01 11:00:00 11
2026-03-01 12:00:00 99
2026-03-01 13:00:00 15
2026-03-01 14:00:00 73
2026-03-01 15:00:00 18
2026-03-01 16:00:00 7
2026-03-01 17:00:00 25
2026-03-01 18:00:00 35
2026-03-01 19:00:00 95
2026-03-01 20:00:00 28
2026-03-01 21:00:00 0
2026-03-01 22:00:00 12
2026-03-01 23:00:00 95
Freq: h, dtype: int32

#按天重采样每日销售额
day_sales=hours_sales.resample("D").sum()
print("全天总销售额:")
print(day_sales)

全天总销售额:
<bound method IndexOpsMixin.to_numpy of 2026-03-01 1000
Freq: D, dtype: int32>

#计算每天营业时间(8-22)和非营业时间的的销售额比例

# hours_sales.between_time("8:00","22:00")

bus_time_sales=hours_sales[(hours_sales.index.hour>=8)&(hours_sales.index.hour<=22)].sum()
print(f"营业时间总销售额:{bus_time_sales}")

#drop方法(drop表示去除)
no_bus_time_sales=hours_sales.drop(hours_sales.between_time("8:00","22:00").index)
print(f"非营业时间总销售额:{no_bus_time_sales.sum()}")

营业时间总销售额:603
非营业时间总销售额:397

#寻找销售额最高的三个小时的信息
print(hours_sales.nlargest(3))

2026-03-01 09:00:00 99
2026-03-01 12:00:00 99
2026-03-01 19:00:00 95
dtype: int32

posted @ 2026-03-03 13:33 wangzy336 阅读(14) 评论(0) 收藏举报

刷新页面返回顶部

wangzy336