二手车价格预测Baseline(一)函数导入与数据读取
引用阿里云天池:视频 Baseline 赛题理解 https://tianchi.aliyun.com/course/video?spm=5176.12586971.1001.79.62f9593aP87K0Z&liveId=41143
文档 https://tianchi.aliyun.com/notebook-ai/detail?spm=5176.12281978.0.0.6802593ax58pa6&postId=95422
赛题理解

- 用pandas读取数据
import pandas as pd
import numpy as np
path = ' '
Train_data=pd.read_csv(path+'train.csv',sep=' ')
Test_data=pd.read_csv(path+'test.csv',sep=' ')
print("Train data shape:",Train_data.shape)
print("Test data shape:",Test_data.shape)
- 分类指标评价计算示例
# accuracy
import numpy as np
from sklearn.metrics import accuracy_score
y_pred = [0,1,0,1]
y_true = [0,1,1,1]
print("ACC:",accuracy_score(y_true,y_pred))
#Precision,Recall,F1-score
from sklearn import metrics
y_pred = [0,1,0,0]
y_true = [0,1,0,1]
print('Precision',metrics.precision_score(y_true,y_pred))
print('Recall',metrics.recall_score(y_true,y_pred))
print('F1-score:',metrics.f1_score(y_true,y_pred))
#AUC
import numpy as np
from sklearn.metrics import roc_auc_score
y_true = np.array([0,0,1,1])
y_scores = np.array([0.1,0.4,0.35,0.8])
print('AUC score:',roc_auc_score(y_tue,y_scores))
BaseLine
- Step1 导入函数工具箱
#基础工具
import numpy as np
import pandas as pd
import warnings
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.special import jn
from IPython.display import display,clear_output
import time
warnings.filterwarnings('ignore')
%matplotlib inline
#模型预测的
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.svm import SVR
from sklearn.ensemble import RandomForesrRegressor,GradientBoostingRegressor
#数据降维处理
from sklearn.decomposition import PCA,FastICA,FactorAnalysis,SparsePCA
import lightgbm as lgb
import xgboost as xgb
#参数搜索和评价
from sklearn.model_selection import GridSearcgCV,cross_val_score,StratifiedKFold
from sklearn.metrics import mean_squared_error,mean_absolute_error
- Step2数据读取 EDA
import pandas as pd
import numpy as np
path = ' '
Train_data=pd.read_csv(path+'train.csv',sep=' ')
Test_data=pd.read_csv(path+'test.csv',sep=' ')
print("Train data shape:",Train_data.shape)
print("Test data shape:",Test_data.shape)
#查看数据前10行
Train_data.head(10)
# 数据统计信息浏览
#通过.describe()可以查看数值特征列的一些统计信息
Train_data.describe()
TestA_data.describe()
浙公网安备 33010602011771号