二手车价格预测Baseline(一)函数导入与数据读取

引用阿里云天池:视频 Baseline 赛题理解 https://tianchi.aliyun.com/course/video?spm=5176.12586971.1001.79.62f9593aP87K0Z&liveId=41143
文档 https://tianchi.aliyun.com/notebook-ai/detail?spm=5176.12281978.0.0.6802593ax58pa6&postId=95422

赛题理解

  • 用pandas读取数据

import pandas as pd
import numpy as np
path = '  '
Train_data=pd.read_csv(path+'train.csv',sep=' ')
Test_data=pd.read_csv(path+'test.csv',sep=' ')
print("Train data shape:",Train_data.shape)
print("Test data shape:",Test_data.shape)

  • 分类指标评价计算示例

# accuracy
import numpy as np
from sklearn.metrics import accuracy_score
y_pred = [0,1,0,1]
y_true = [0,1,1,1]
print("ACC:",accuracy_score(y_true,y_pred))

#Precision,Recall,F1-score
from sklearn import metrics
y_pred = [0,1,0,0]
y_true = [0,1,0,1]
print('Precision',metrics.precision_score(y_true,y_pred))
print('Recall',metrics.recall_score(y_true,y_pred))
print('F1-score:',metrics.f1_score(y_true,y_pred))

#AUC
import numpy as np
from sklearn.metrics import roc_auc_score
y_true = np.array([0,0,1,1])
y_scores = np.array([0.1,0.4,0.35,0.8])
print('AUC score:',roc_auc_score(y_tue,y_scores))

BaseLine

  • Step1 导入函数工具箱

#基础工具
import numpy as np
import pandas as pd
import warnings
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.special import jn
from IPython.display import display,clear_output
import time

warnings.filterwarnings('ignore')
%matplotlib inline

#模型预测的
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.svm import SVR
from sklearn.ensemble import RandomForesrRegressor,GradientBoostingRegressor

#数据降维处理
from sklearn.decomposition import PCA,FastICA,FactorAnalysis,SparsePCA
import lightgbm as lgb
import xgboost as xgb

#参数搜索和评价
from sklearn.model_selection import GridSearcgCV,cross_val_score,StratifiedKFold
from sklearn.metrics import mean_squared_error,mean_absolute_error

  • Step2数据读取 EDA

import pandas as pd
import numpy as np
path = '  '
Train_data=pd.read_csv(path+'train.csv',sep=' ')
Test_data=pd.read_csv(path+'test.csv',sep=' ')
print("Train data shape:",Train_data.shape)
print("Test data shape:",Test_data.shape)

#查看数据前10行
Train_data.head(10)

# 数据统计信息浏览
#通过.describe()可以查看数值特征列的一些统计信息
Train_data.describe()
TestA_data.describe()

posted on 2020-06-06 16:52  heroy1  阅读(174)  评论(0)    收藏  举报