pm2.5
五城PM2.5分析
① csv文件数据处理
- 名词解释
| 名词 | 释义 |
|---|---|
| DEMP | 露点 |
| HUMI | 湿度 |
| PRES | 气压 |
| TEMP | 温度 |
| cbwd | 风向 |
| Iws | 累计风速 |
| Precipitation | 降雨量 |
| Ipre | 累计降雨量 |
- 数据导入
pd.read_csv # 我无法使用相对路径,每次都用的绝对路径,很不简洁
- 粗处理
- 或许应该采用:
PM2.5_data_dict={'beijing': ('略', ['PM_Dongsi', 'PM_Dongsihuan', 'PM_Nongzhanguan'], '北京'), 'chengdu': ('', ['PM_Caotangsi', 'PM_Shahepu'], '成都'), 'guangzhou': ('', ['PM_City Station', 'PM_5th Middle School'], '广州'), 'shanghai': ('', ['PM_Jingan', 'PM_Xuhui'], '上海'), 'shenyang': ('', ['PM_Taiyuanjie', 'PM_Xiaoheyan'], '沈阳')} #但我遇到bug:只能读取最后一条键值对 #所以后面基于dict的处理都只能输出shenyang的数据……- 我的做法
#导入后未将数据整合,逻辑简单了但重复的操作增多#错了又错,最后找到方法了11.23#修改了season列,显示具体的季节名称而非序号 df_bj['season'] = df_bj['season'].map( {1: 'Spring', 2: 'Summer', 3: 'Autumn', 4: 'Winter'}) df_sh['season'] = df_sh['season'].map( {1: 'Spring', 2: 'Summer', 3: 'Autumn', 4: 'Winter'}) df_sy['season'] = df_sy['season'].map( {1: 'Spring', 2: 'Summer', 3: 'Autumn', 4: 'Winter'}) df_cd['season'] = df_cd['season'].map( {1: 'Spring', 2: 'Summer', 3: 'Autumn', 4: 'Winter'}) df_gz['season'] = df_gz['season'].map( {1: 'Spring', 2: 'Summer', 3: 'Autumn', 4: 'Winter'})
② 计算与分析
- 计算均值
- 题目要求:计算各城市每日均值
#一开始采用了这种傻瓜且错误的办法,没有达到预期效果并且使数据更复杂,同时还创建了一个新的csv文件用于存放新列 df_bj['PM_Meanbj'] = round((df_bj['PM_Dongsi']+df_bj['PM_Dongsihuan']+df_bj['PM_Nongzhanguan'])/3) df_cd['PM_Meancd'] = round((df_cd['PM_Caotangsi']+df_cd['PM_Shahepu'])/2) df_gz['PM_Meangz'] = round( (df_gz['PM_5th Middle School']+df_gz['PM_City Station'])/2) df_sh['PM_Meansh'] = round((df_sh['PM_Jingan']+df_sh['PM_Xuhui'])/2) df_sy['PM_Meansy'] = round((df_sy['PM_Taiyuanjie']+df_sy['PM_Xiaoheyan'])/2)#改良 # 以北京为例 bj_pm = df_bj.loc[:, ['PM_Dongsi', 'PM_Dongsihuan', 'PM_Nongzhanguan']] bj_pm.replace(0, np.nan) bj_pm_mean = (bj_pm.mean(axis=0)).mean() #结果:所有时间的国内气象站测点平均值90.11966033999813 #并不是daily的- 再改良->得出结果
dict = {'beijing': ('',['PM_Dongsi', 'PM_Dongsihuan', 'PM_Nongzhanguan'], '北京'), 'shanghai': ('', ['PM_Jingan', 'PM_Xuhui'],'上海'), 'chengdu':('',['PM_Caotangsi','PM_Shahepu'],'成都'), 'guangzhou':('',['PM_City Station','PM_5th Middle School'],'广州'), 'shenyang':('',['PM_Taiyuanjie', 'PM_Xiaoheyan'],'沈阳')} bj_district = dict['beijing'][1] bj_day_pm=df_bj.groupby(by=['year','month','day'])[bj_district].mean().mean(axis=1)
- 数据可视化
-
基于中国地图显示五城pm2.5均值
- 开始放飞
# 将数据可视化——PM2.5地理分布图 # 激活对象 geo = Geo() # 添加中国地图 geo.add_schema(maptype='china') # 添加数据 geo.add('PM2.5值', [['北京', bj_pm_mean], ['上海', sh_pm_mean], [ '成都', cd_pm_mean], ['广州', gz_pm_mean], ['沈阳', sy_pm_mean]]) geo.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) geo.set_global_opts( visualmap_opts=opts.VisualMapOpts(is_piecewise=True), title_opts=opts.TitleOpts(title='北上广成沈五城PM2.5平均值') ) # 绘制成功 geo.render() -
绘制逐年空气质量变化->matplotlib
- 第一次尝试,只得出了每个测点每年的平均值
# 以北京为例 bj_year_pm = df_bj.groupby(df_bj['year']).mean().loc[:, ['PM_Dongsi', 'PM_Dongsihuan', 'PM_Nongzhanguan','PM_US Post']] # 未得出三个测点逐年平均值 # warning # The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. # 数据中有空值 在mean中添加 mean(numeric_only=True)# 所以试图… (93.209072+92.496761+90.041620)/3 # 91.91581766666667 # 先这么继续吧,说不定后面就知道该怎么写了- 经过多次尝试,终于找到方法了->显示年均值
bj_day_pm=df_bj.groupby(by=['year','month','day'])[bj_district].mean().mean(axis=1)#再次修改 bj_year_pm=df_bj.resample('Y')[bj_district].mean().mean(axis=1)df_bj.set_index(pd.PeriodIndex(year=df_bj['year'], month=df_bj['month'], day=df_bj['day'], hour=df_bj['hour'], freq='H'), inplace=True) year_x=bj_year_pm.dropna().index bj_year_y=list(bj_year_pm) plt.figure(figsize=(12,7)) plt.title('Year_Average_PM2.5') plt.xlabel('year') plt.ylabel('PM2.5') plt.plot(year_x,bj_year_y,label="beijing") plt.plot(year_x,cd_year_y,label='chengdu') plt.plot(year_x,sh_year_y,label='shanghai') plt.plot(year_x,sy_year_y,label='shenyang') plt.plot(year_x,gz_year_y,label='guangzhou') plt.legend() plt.show()
-

浙公网安备 33010602011771号