日期型数据处理
import pandas as pd
#先创建一个数据框(包含缺失值)
df = pd.DataFrame({'auth_date':['2017-01-02','2017-02-02','2017-12-23','NaN'],
'sply_date':['2018-01-02','2018-02-02','2018-12-23','NaN'],
'rgst_time':['2018-02-03 17:12:42','2018-10-02 12:14:43','2018-03-23 16:23:24','NaN'],
'name':['zhangsan','lisi','xiaohua','xiaomei']})
feature = df.columns.tolist()
#当日期型数据比较多时,可以写一个封装好的代码,如下:
def datetime_processing(df):
"""
argumenr:df:数据框
goal: 对日期型数据转数值型数据
return: df:日期型数据处理完之后的数据
"""
#日期数据精确到日
date_feature=['auth_date','sply_date']
for feature in date_feature:
df[feature] = pd.to_datetime(df[feature])
df[feature] = df[feature] - pd.to_datetime("2000-01-01")
df[feature] = df[feature].astype("str")
df[feature] = df[feature].apply(lambda x:x.replace("days 00:00:00.000000000","").replace("NaT","0"))
df[feature] = df[feature].astype("int")
#日期型数据精确到秒
datetime_feature = ['rgst_time']
for feature in datetime_feature:
df[feature] = pd.to_datetime(df[feature])
df[feature] = (df[feature] - pd.to_datetime("2000-01-01")).dt.seconds
df[feature] = df[feature] .fillna(0)
return df
#看一下处理之后的数据 df = datetime_processing(df) df.info()
处理前:
df
Out[79]:
auth_date sply_date rgst_time name
0 2017-01-02 2018-01-02 2018-02-03 17:12:42 zhangsan
1 2017-02-02 2018-02-02 2018-10-02 12:14:43 lisi
2 2017-12-23 2018-12-23 2018-03-23 16:23:24 xiaohua
3 NaN NaN NaN xiaomei
处理后:
df Out[81]: auth_date sply_date rgst_time name 0 6211 6576 61962.0 zhangsan 1 6242 6607 44083.0 lisi 2 6566 6931 59004.0 xiaohua 3 0 0 0.0 xiaomei

浙公网安备 33010602011771号