Python读取csv格式文件

d:/train.csv

datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23

 

# -*- coding:utf-8 -*-
import calendar
import pandas as pd
from datetime import datetime
import warnings
#pd.options.mode.chained_assignment = None
warnings.filterwarnings("ignore", category=DeprecationWarning)

#设置选项,防止head()出现省略号
pd.set_option('display.width',None)

dailyData = pd.read_csv("d:/train.csv", encoding='gbk')

print(dailyData.shape)
print(dailyData.head())
print(dailyData.columns.tolist())
#2011-01-01 00:00:00       1        0           0        1  9.84  14.395        81        0.0       3          13     16
dailyData["date"] = dailyData.datetime.apply(lambda x : x.split()[0])
dailyData["hour"] = dailyData.datetime.apply(lambda x : x.split()[1].split(":")[0])
dailyData["weekday"] = dailyData.date.apply(lambda dateString : calendar.day_name[datetime.strptime(dateString,"%Y-%m-%d").weekday()])
dailyData["month"] = dailyData.date.apply(lambda dateString : calendar.month_name[datetime.strptime(dateString,"%Y-%m-%d").month])
dailyData["season"] = dailyData.season.map({1: "Spring", 2 : "Summer", 3 : "Fall", 4 :"Winter" })


categoryVariableList = ["hour","weekday","month","season","weather","holiday","workingday"]
for var in categoryVariableList:
    dailyData[var] = dailyData[var].astype("category")

dailyData  = dailyData.drop(["datetime"], axis=1)
print(dailyData.head())

(7, 12)
datetime season holiday workingday weather temp atemp humidity windspeed casual registered count
0 2011-01-01 00:00:00 1 0 0 1 9.84 14.395 81 0.0 3 13 16
1 2011-01-02 00:00:00 2 0 0 1 1.84 12.391 55 0.0 1 19 23
2 2011-01-02 00:00:00 2 0 0 1 1.84 12.391 55 0.0 1 19 23
3 2011-01-02 00:00:00 2 0 0 1 1.84 12.391 55 0.0 1 19 23
4 2011-01-02 00:00:00 2 0 0 1 1.84 12.391 55 0.0 1 19 23
['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp', 'atemp', 'humidity', 'windspeed', 'casual', 'registered', 'count']
season holiday workingday weather temp atemp humidity windspeed casual registered count date hour weekday month
0 Spring 0 0 1 9.84 14.395 81 0.0 3 13 16 2011-01-01 00 Saturday January
1 Summer 0 0 1 1.84 12.391 55 0.0 1 19 23 2011-01-02 00 Sunday January
2 Summer 0 0 1 1.84 12.391 55 0.0 1 19 23 2011-01-02 00 Sunday January
3 Summer 0 0 1 1.84 12.391 55 0.0 1 19 23 2011-01-02 00 Sunday January
4 Summer 0 0 1 1.84 12.391 55 0.0 1 19 23 2011-01-02 00 Sunday January

posted @ 2019-05-19 08:41  牧 天  阅读(196)  评论(0编辑  收藏  举报