日期类型转换
# tips2: 删去2010年之前的数据
# 日期类型转换转成数据类型格式
data_hair1['review_date'] = pd.to_datetime(data_hair1['review_date'])
# 筛选出时间在2010年后的数据
data_hair1 = data_hair1[data_hair1['review_date'] > '2010']
找出最大最小时间差距
# 查看数据持续时间
# 找出某类最大值和最小值
# value_max = data_hair1.groupby('product_parent').max(data_hair1['review_date'])
data_max_date = data_hair1[['product_id', 'review_date']].groupby('product_id').max()
# data_max_date = pd.merge(data_max_date, data_hair1, on=['product_id', 'review_date'], how='left')
data_min_date = data_hair1[['product_id', 'review_date']].groupby('product_id').min()
# data_min_date = pd.merge(data_min_date, data_hair1, on=['product_id', 'review_date'], how='left')
data_date_cha = data_max_date['review_date'] - data_min_date['review_date']
data_date_cha = data_date_cha / pd.Timedelta(1, 'd')
汇总每个季度的和
# 找出每个季度的和
# 抽出两列成为新的dataframe
data_hair2 = pd.DataFrame([data_hair1['star_rating'], data_hair1['helpful_votes']]).stack().unstack(0)
# 更换索引 索引需要为日期索引
data_hair2.index = data_hair1['review_date']
# 求每个季度的star和 字母不同有区别 年y 月m, 周w, 日d
data_hair2_Q = data_hair2['star_rating'].resample('Q').sum()