python衍生特征
1.衍生方式一
df=pd.DataFrame({'id':[2,2,2,3,3,5],'cur':['cur1','cur2','cur3','cur1','cur1','cur2']})
df

df_tmp=pd.crosstab(df['id'],df['cur']) df_tmp

df_new=pd.DataFrame({'id':list(set(df.id))})
df_new['cur1_count']=0
df_new['cur_count_all']=0
df_new['cur_cate_num']=0
for id in list(set(df.id)): df_new.loc[df_new.id==id,'cur1_count']=df_tmp.loc[id,'cur1'] df_new.loc[df_new.id==id,'cur_count_all']=df_tmp.loc[id,'cur1':'cur3'].sum() df_new.loc[df_new.id==id,'cur_cate_num']=len(np.where(df_tmp.loc[id,'cur1':'cur3']>0)[0])
df_new.head()


浙公网安备 33010602011771号