pandas pivot_table或者groupby实现sql 中的count distinct 功能

import pandas as pd
import numpy as np

data = pd.read_csv('活跃买家分析初稿.csv')

data.head()

	recycler_key	date 周	date 年	date 月	记录数
0	1694	周 1	2018	一月	6
1	1693	周 1	2018	一月	14
2	1686	周 1	2018	一月	20
3	1677	周 1	2018	一月	62
4	1676	周 1	2018	一月	25

data.columns=['merchant','week','year','month','records']

data.head()

	merchant	week	year	month	records
0	1694	周 1	2018	一月	6
1	1693	周 1	2018	一月	14
2	1686	周 1	2018	一月	20
3	1677	周 1	2018	一月	62
4	1676	周 1	2018	一月	25

data1 =data.groupby(['month','merchant']).size()

data1.head()

month  merchant
一月     1           2
       240         1
       241         1
       256         9
       277         2
dtype: int64

data1.reset_index().head()

data1.reset_index().groupby('month')['merchant'].size().reindex(['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']).reset_index()

data2=data.pivot_table(index='month',values='merchant',aggfunc=lambda x:len(x.unique()))

data2.reindex(['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']).reset_index()

data3 = data.pivot_table(index='month',values='merchant',aggfunc=pd.Series.nunique)

data3.reindex(['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']).reset_index()

data4 = data.groupby(['month']).agg({'merchant': pd.Series.nunique})

data4.reindex(['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']).reset_index()

posted on 2019-03-28 11:47 多一点阅读(4056) 评论(0) 收藏举报

刷新页面返回顶部

多一点