1.读入数据

#-*- coding:utf-8 -*-
import numpy as np
import pandas as pd
df_data = pd.read_csv('filename',sep='\t',header=False,names=['distance','pid','time','fee'])
#http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html

2.ix

df_data.ix[df_data['carpool']==1,'discount'] = 1-df_data['discount_fee']/df_data['total_fee']

df_sofa['start_gird'] = df_sofa[['start_lat','start_lng']].apply(lambda x:get_HexCellKey(x[0],x[1]),axis=1)

3.DataFrame

time_split = pd.DataFrame((x.split(' ') for x in data.bubble_time),index=data.index,columns=['day','h'])

4.cut

bins_p = [0, 30, 40, 60, 80, 400]
group_names_p = ['10', '30', '40', '60', '80']
data['price'] = pd.cut(data['total_fee'], bins_p, labels=group_names_p)

5.sort_values

data=data.sort_values(by=['pid','time'],ascending=True)
 posted on 2017-05-11 17:52  星心soul  阅读(176)  评论(0)    收藏  举报