Python电子游戏销售分析

一、摘要

本次数据分析数据来源于kaggle的Video Game Sales电子游戏销售分析,包含销售日期到2016年10月26日为止的16,500个游戏。

二、选题背景

通过销量和销售金额两个维度分析近几年来电子游戏行业在全球的发展概况,并形成可视化分析报告。

三、实施过程及代码

导入包

from pyecharts import options as opts
import pandas as pd
import numpy as np
from pyecharts.charts import Bar,Pie
import seaborn as sns
import matplotlib.pyplot as plt

数据的观察与清理

data = pd.read_csv('/home/kesci/input/Datasets6073/vgsales.csv')
data.head()

data.info()

# 由于数据总量大,所以直接删除缺失并不影响,并重置一下索引
data.dropna(inplace = True)
data.reset_index(drop=True,inplace = True)
data.head(10)

data.describe().T

data.describe(include='object').T

从用户方面

data['Year'] = data['Year'].astype(str)
x_first_5 = data[((data['Year'] == '2020.0')|(data['Year'] == '2019.0')|(data['Year']=='2018.0')|(data['Year']=='2017.0')|(data['Year']=='2016.0'))]
x_next_5 = data[((data['Year'] == '2015.0')|(data['Year'] == '2014.0')|(data['Year']=='2013.0')|(data['Year']=='2012.0')|(data['Year']=='2011.0'))]
bar1 = Bar(init_opts=opts.InitOpts(width='1450px', height='350px'))
bar1.add_xaxis(x_next_5['Genre'].value_counts().index.tolist())
bar1.add_yaxis("",x_next_5['Genre'].value_counts().tolist())
bar1.set_global_opts(title_opts=opts.TitleOpts(title="2015-2011年用户喜欢的游戏类型"),
                     visualmap_opts=opts.VisualMapOpts(max_=100),
                    ) 
bar1.render_notebook()

bar2 = Bar(init_opts=opts.InitOpts(width='1350px', height='350px'))
bar2.add_xaxis(x_first_5['Genre'].value_counts().index.tolist())
bar2.add_yaxis("",x_first_5['Genre'].value_counts().tolist())
bar2.set_global_opts(title_opts=opts.TitleOpts(title="2020-2015年用户喜欢的类型"),
                     visualmap_opts=opts.VisualMapOpts(max_=100),
                    ) 
bar2.render_notebook()

最近十年内的变化还是挺大的,体育竞技和射击类地位呼唤,动作类仍旧是老大的位置

bar1 = Bar(init_opts=opts.InitOpts(width='1450px', height='350px'))
bar1.add_xaxis(x_next_5['Platform'].value_counts().index.tolist())
bar1.add_yaxis("",x_next_5['Platform'].value_counts().tolist())
bar1.set_global_opts(title_opts=opts.TitleOpts(title="2015-2011年游戏平台用户量"),
                     visualmap_opts=opts.VisualMapOpts(max_=100),
                    ) 
bar1.render_notebook()

bar1 = Bar(init_opts=opts.InitOpts(width='1450px', height='350px'))
bar1.add_xaxis(x_first_5['Platform'].value_counts().index.tolist())
bar1.add_yaxis("",x_first_5['Platform'].value_counts().tolist())
bar1.set_global_opts(title_opts=opts.TitleOpts(title="2020-2016年游戏平台用户量"),
                     visualmap_opts=opts.VisualMapOpts(max_=100),
                    ) 
bar1.render_notebook()

由于技术的进步,大部分跟不上时代的平台被新平排挤,PS4替代PS2的地位,PS2不见踪影

发行商方面

pie1 = Pie(init_opts=opts.InitOpts(width='1350px', height='550px'))
pie1.add('', [list(z) for z in zip(x_next_5['Publisher'].value_counts().head(10).index.tolist(), x_next_5['Publisher'].value_counts().head(10).tolist())], radius=['35%', '60%'])
pie1.set_global_opts(title_opts=opts.TitleOpts(title='2015-2011全球前十发行商'), 
                     legend_opts=opts.LegendOpts(orient='vertical', pos_top='15%', pos_left='2%'))
pie1.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
pie1.render_notebook()

pie1 = Pie(init_opts=opts.InitOpts(width='1350px', height='550px'))
pie1.add('', [list(z) for z in zip(x_first_5['Publisher'].value_counts().head(10).index.tolist(), x_first_5['Publisher'].value_counts().head(10).tolist())], radius=['35%', '60%'])
pie1.set_global_opts(title_opts=opts.TitleOpts(title='2020-2015全球前十发行商'), 
                     legend_opts=opts.LegendOpts(orient='vertical', pos_top='15%', pos_left='2%'))
pie1.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
pie1.render_notebook()

暴雪近十年变化巨大,由前三巨头变成第八位

Square Enix(史克威尔艾尼克斯),由于并入了Eidos工作室,并运行了工作室旗下的的古墓丽影系列从而杀出重围,

同时其最终幻想系列是电子游戏史最畅销的系列。

Tecmo Koei(光荣特库摩)则抓住了女性玩家市场

任天堂、世嘉跌出前十

市场方面

Market_5 = ['NA_Sales','EU_Sales','JP_Sales','Other_Sales','Global_Sales']
data_market_5=pd.pivot_table(data,index='Year',values=Market_5,aggfunc=np.sum)
fig=plt.figure(figsize=(30,6))
sns.lineplot(data=data_market_5,size_order = 5)
plt.title('Market_progress')

Publisher_10 = data['Publisher'].value_counts().head(10).index.tolist()
Publisher_10

data_Publisher_10=data[data['Publisher'].isin(Publisher_10)]
data_Publisher_10_S=pd.pivot_table(data=data_Publisher_10,index='Year',columns='Publisher',values='Global_Sales',aggfunc=np.sum)
data_Publisher_10_S.plot(title='Top 10 publishers sales',figsize=(12,6))

 

data.describe().T

pd.options.display.max_rows = 120   # 限制最大显示行数为120行
Data_10PBL_G_M_p=pd.pivot_table(data=data_Publisher_10,index=['Genre','Publisher'],values=Market_5,aggfunc=np.sum)
Data_10PBL_G_M_p.sort_values(by=['Genre','Global_Sales'],ascending=False)

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

从1995年开始游戏销量暴涨于2004-2012年抵达峰值之后慢慢暴跌,其中销售最高的是北美其次是欧洲日本地区虽然销售不比其余市场,但日本一个国家的销售额就占据全球的14%,而且日本的两大游戏公司都是全球前十,2015年销量惨淡。任天堂在2006年迎来一次大爆发,而其余发行商却萎靡,在20015年所有的发行商销量都下滑,由于数据集中没有2018年的销售数据所以无法下更准确的结论。当时处于全球经济不景气,投入到娱乐项目的经济自然会减少。

四、结论

游戏的发展以及出现回落,具有竞争性和刺激性的游戏更加能够吸引玩家的喜欢,同时经济是否发达的地区与游戏的销量高低具有直接的关系,经济地区的销量更高。

五、代码汇总

from pyecharts import options as opts


import pandas as pd


import numpy as np


from pyecharts.charts import Bar,Pie


import seaborn as sns


import matplotlib.pyplot as plt

data = pd.read_csv('/home/kesci/input/Datasets6073/vgsales.csv')
data.head()

data.info()

# 由于数据总量大,所以直接删除缺失并不影响,并重置一下索引
data.dropna(inplace = True)
data.reset_index(drop=True,inplace = True)
data.head(10)

data.describe().T

data.describe(include='object').T

data['Year'] = data['Year'].astype(str)
x_first_5 = data[((data['Year'] == '2020.0')|(data['Year'] == '2019.0')|(data['Year']=='2018.0')|(data['Year']=='2017.0')|(data['Year']=='2016.0'))]
x_next_5 = data[((data['Year'] == '2015.0')|(data['Year'] == '2014.0')|(data['Year']=='2013.0')|(data['Year']=='2012.0')|(data['Year']=='2011.0'))]

bar1 = Bar(init_opts=opts.InitOpts(width='1450px', height='350px'))
bar1.add_xaxis(x_next_5['Genre'].value_counts().index.tolist())
bar1.add_yaxis("",x_next_5['Genre'].value_counts().tolist())
bar1.set_global_opts(title_opts=opts.TitleOpts(title="2015-2011年用户喜欢的游戏类型"),
visualmap_opts=opts.VisualMapOpts(max_=100),
)
bar1.render_notebook()

bar2 = Bar(init_opts=opts.InitOpts(width='1350px', height='350px'))
bar2.add_xaxis(x_first_5['Genre'].value_counts().index.tolist())
bar2.add_yaxis("",x_first_5['Genre'].value_counts().tolist())
bar2.set_global_opts(title_opts=opts.TitleOpts(title="2020-2015年用户喜欢的类型"),
visualmap_opts=opts.VisualMapOpts(max_=100),
)
bar2.render_notebook()

bar1 = Bar(init_opts=opts.InitOpts(width='1450px', height='350px'))
bar1.add_xaxis(x_next_5['Platform'].value_counts().index.tolist())
bar1.add_yaxis("",x_next_5['Platform'].value_counts().tolist())
bar1.set_global_opts(title_opts=opts.TitleOpts(title="2015-2011年游戏平台用户量"),
visualmap_opts=opts.VisualMapOpts(max_=100),
)
bar1.render_notebook()

bar1 = Bar(init_opts=opts.InitOpts(width='1450px', height='350px'))
bar1.add_xaxis(x_first_5['Platform'].value_counts().index.tolist())
bar1.add_yaxis("",x_first_5['Platform'].value_counts().tolist())
bar1.set_global_opts(title_opts=opts.TitleOpts(title="2020-2016年游戏平台用户量"),
visualmap_opts=opts.VisualMapOpts(max_=100),
)
bar1.render_notebook()

pie1 = Pie(init_opts=opts.InitOpts(width='1350px', height='550px'))
pie1.add('', [list(z) for z in zip(x_next_5['Publisher'].value_counts().head(10).index.tolist(), x_next_5['Publisher'].value_counts().head(10).tolist())], radius=['35%', '60%'])
pie1.set_global_opts(title_opts=opts.TitleOpts(title='2015-2011全球前十发行商'),
legend_opts=opts.LegendOpts(orient='vertical', pos_top='15%', pos_left='2%'))
pie1.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
pie1.render_notebook()

pie1 = Pie(init_opts=opts.InitOpts(width='1350px', height='550px'))
pie1.add('', [list(z) for z in zip(x_first_5['Publisher'].value_counts().head(10).index.tolist(), x_first_5['Publisher'].value_counts().head(10).tolist())], radius=['35%', '60%'])
pie1.set_global_opts(title_opts=opts.TitleOpts(title='2020-2015全球前十发行商'),
legend_opts=opts.LegendOpts(orient='vertical', pos_top='15%', pos_left='2%'))
pie1.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
pie1.render_notebook()

Market_5 = ['NA_Sales','EU_Sales','JP_Sales','Other_Sales','Global_Sales']
data_market_5=pd.pivot_table(data,index='Year',values=Market_5,aggfunc=np.sum)
fig=plt.figure(figsize=(30,6))
sns.lineplot(data=data_market_5,size_order = 5)
plt.title('Market_progress')

Publisher_10 = data['Publisher'].value_counts().head(10).index.tolist()
Publisher_10

data_Publisher_10=data[data['Publisher'].isin(Publisher_10)]
data_Publisher_10_S=pd.pivot_table(data=data_Publisher_10,index='Year',columns='Publisher',values='Global_Sales',aggfunc=np.sum)
data_Publisher_10_S.plot(title='Top 10 publishers sales',figsize=(12,6))

pd.options.display.max_rows = 120 # 限制最大显示行数为120行


Data_10PBL_G_M_p=pd.pivot_table(data=data_Publisher_10,index=['Genre','Publisher'],values=Market_5,aggfunc=np.sum)
Data_10PBL_G_M_p.sort_values(by=['Genre','Global_Sales'],ascending=False)

  

posted @ 2021-06-24 00:40  JINTAI1  阅读(314)  评论(0编辑  收藏  举报