Python读取csv格式文件
d:/train.csv
datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
2011-01-02 00:00:00,2,0,0,1,1.84,12.391,55,0.0,1,19,23
# -*- coding:utf-8 -*- import calendar import pandas as pd from datetime import datetime import warnings #pd.options.mode.chained_assignment = None warnings.filterwarnings("ignore", category=DeprecationWarning) #设置选项,防止head()出现省略号 pd.set_option('display.width',None) dailyData = pd.read_csv("d:/train.csv", encoding='gbk') print(dailyData.shape) print(dailyData.head()) print(dailyData.columns.tolist()) #2011-01-01 00:00:00 1 0 0 1 9.84 14.395 81 0.0 3 13 16 dailyData["date"] = dailyData.datetime.apply(lambda x : x.split()[0]) dailyData["hour"] = dailyData.datetime.apply(lambda x : x.split()[1].split(":")[0]) dailyData["weekday"] = dailyData.date.apply(lambda dateString : calendar.day_name[datetime.strptime(dateString,"%Y-%m-%d").weekday()]) dailyData["month"] = dailyData.date.apply(lambda dateString : calendar.month_name[datetime.strptime(dateString,"%Y-%m-%d").month]) dailyData["season"] = dailyData.season.map({1: "Spring", 2 : "Summer", 3 : "Fall", 4 :"Winter" }) categoryVariableList = ["hour","weekday","month","season","weather","holiday","workingday"] for var in categoryVariableList: dailyData[var] = dailyData[var].astype("category") dailyData = dailyData.drop(["datetime"], axis=1) print(dailyData.head())
(7, 12)
datetime season holiday workingday weather temp atemp humidity windspeed casual registered count
0 2011-01-01 00:00:00 1 0 0 1 9.84 14.395 81 0.0 3 13 16
1 2011-01-02 00:00:00 2 0 0 1 1.84 12.391 55 0.0 1 19 23
2 2011-01-02 00:00:00 2 0 0 1 1.84 12.391 55 0.0 1 19 23
3 2011-01-02 00:00:00 2 0 0 1 1.84 12.391 55 0.0 1 19 23
4 2011-01-02 00:00:00 2 0 0 1 1.84 12.391 55 0.0 1 19 23
['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp', 'atemp', 'humidity', 'windspeed', 'casual', 'registered', 'count']
season holiday workingday weather temp atemp humidity windspeed casual registered count date hour weekday month
0 Spring 0 0 1 9.84 14.395 81 0.0 3 13 16 2011-01-01 00 Saturday January
1 Summer 0 0 1 1.84 12.391 55 0.0 1 19 23 2011-01-02 00 Sunday January
2 Summer 0 0 1 1.84 12.391 55 0.0 1 19 23 2011-01-02 00 Sunday January
3 Summer 0 0 1 1.84 12.391 55 0.0 1 19 23 2011-01-02 00 Sunday January
4 Summer 0 0 1 1.84 12.391 55 0.0 1 19 23 2011-01-02 00 Sunday January