第三周作业

第一部分——飞机客户数据分析预测

代码一:读取数据

import pandas as pd

datafile='D:/Jupyter/a/air_data.csv'
resultfile='D:/Jupyter/a/explore.csv'

data = pd.read_csv(datafile,encoding = 'utf-8')

explore = data.describe(percentiles = [],include = 'all').T
explore['null'] = len(data)-explore['count']

explore = explore[['null','max','min']]
explore.columns = [u'空值数',u'最大值',u'最小值']

explore.to_csv(resultfile)
print(explore)

                           空值数        最大值    最小值
MEMBER_NO                  0.0    62988.0    1.0
FFP_DATE                     0        NaN    NaN
FIRST_FLIGHT_DATE            0        NaN    NaN
GENDER                       3        NaN    NaN
FFP_TIER                   0.0        6.0    4.0
WORK_CITY                 2269        NaN    NaN
WORK_PROVINCE             3248        NaN    NaN
WORK_COUNTRY                26        NaN    NaN
AGE                      420.0      110.0    6.0
LOAD_TIME                    0        NaN    NaN
FLIGHT_COUNT               0.0      213.0    2.0
BP_SUM                     0.0   505308.0    0.0
EP_SUM_YR_1                0.0        0.0    0.0
EP_SUM_YR_2                0.0    74460.0    0.0
SUM_YR_1                 551.0   239560.0    0.0
SUM_YR_2                 138.0   234188.0    0.0
SEG_KM_SUM                 0.0   580717.0  368.0
WEIGHTED_SEG_KM            0.0  558440.14    0.0
LAST_FLIGHT_DATE             0        NaN    NaN
AVG_FLIGHT_COUNT           0.0     26.625   0.25
AVG_BP_SUM                 0.0    63163.5    0.0
BEGIN_TO_FIRST             0.0      729.0    0.0
LAST_TO_END                0.0      731.0    1.0
AVG_INTERVAL               0.0      728.0    0.0
MAX_INTERVAL               0.0      728.0    0.0
ADD_POINTS_SUM_YR_1        0.0   600000.0    0.0
ADD_POINTS_SUM_YR_2        0.0   728282.0    0.0
EXCHANGE_COUNT             0.0       46.0    0.0
avg_discount               0.0        1.5    0.0
P1Y_Flight_Count           0.0      118.0    0.0
L1Y_Flight_Count           0.0      111.0    0.0
P1Y_BP_SUM                 0.0   246197.0    0.0
L1Y_BP_SUM                 0.0   259111.0    0.0
EP_SUM                     0.0    74460.0    0.0
ADD_Point_SUM              0.0   984938.0    0.0
Eli_Add_Point_Sum          0.0   984938.0    0.0
L1Y_ELi_Add_Points         0.0   728282.0    0.0
Points_Sum                 0.0   985572.0    0.0
L1Y_Points_Sum             0.0   728282.0    0.0
Ration_L1Y_Flight_Count    0.0        1.0    0.0
Ration_P1Y_Flight_Count    0.0        1.0    0.0
Ration_P1Y_BPS             0.0   0.999989    0.0
Ration_L1Y_BPS             0.0   0.999993    0.0
Point_NotFlight            0.0      140.0    0.0

代码二:分析数据并绘制基本图像

from datetime import datetime
import matplotlib.pyplot as plt
ffp=data['FFP_DATE'].apply(lambda x:datetime.strptime(x,'%Y/%m/%d'))
ffp_year=ffp.map(lambda x:x.year)

#绘制各年份会员入会人数直方图
fig=plt.figure(figsize=(8,5))
plt.rcParams['font.sans-serif']='SimHei'
plt.rcParams['axes.unicode_minus']='False'
plt.hist(ffp_year,bins='auto',color='#0504aa')
plt.xlabel('年份')
plt.ylabel('入会人数')
plt.title('各年份会员入会人数(3134)',fontsize=15)
plt.show()
plt.close

#提取会员不同性别人数
male=pd.value_counts(data['GENDER'])['男']
female=pd.value_counts(data['GENDER'])['女']
#绘制会员性别比例饼图
fig=plt.figure(figsize=(10,6))
plt.pie([male,female],labels=['男','女'],colors=['lightskyblue','lightcoral'],autopct='%1.1f%%')
plt.title('会员性别比例(3135)',fontsize=15)
plt.show()
plt.close()

#提取不同级别会员人数
lv_four=pd.value_counts(data['FFP_TIER'])[4]
lv_five=pd.value_counts(data['FFP_TIER'])[5]
lv_six=pd.value_counts(data['FFP_TIER'])[6]
#绘制会员各级别人数条形图
fig=plt.figure(figsize=(8,5))
plt.bar(x=range(3),height=[lv_four,lv_five,lv_six],width=0.4,alpha=0.8,color='skyblue')
plt.xticks([index for index in range(3)],['4','5','6'])
plt.xlabel('会员等级')
plt.ylabel('会员人数')
plt.title('会员各级别人数(3134)',fontsize=15)
plt.show()
plt.close

#提取会员年龄
age=data['AGE'].dropna()
age=age.astype('int64')
#绘制会员年龄分布箱型图
fig=plt.figure(figsize=(5,10))
plt.boxplot(age,
patch_artist=True,
labels=['会员年龄'],
boxprops={'facecolor':'lightblue'})
plt.title('会员年龄分布箱型图(3134)',fontsize=15)
plt.grid(axis='y')
plt.show()
plt.close()




posted @ 2023-03-12 21:38  迂幵  阅读(48)  评论(0)    收藏  举报