机器学习比赛

基本上来就用sklearn的随机森林, 和xgboost库.
2个算法先跑一下.基本能拿到一个能看的结果. 速度都很快. 基本30秒内跑完.

然后再考虑其他的特征工程, 深度学习等等..
模板:

# lightgbm原生接口
import xgboost as xgb
# 基于scikit-learn接口
from xgboost import XGBClassifier
from xgboost import XGBRegressor
 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold






from sklearn.datasets import  load_breast_cancer, load_wine
import warnings
warnings.simplefilter("ignore")

cancer = load_breast_cancer()
wine = load_wine()



# data_train, data_test, target_train, target_test = train_test_split(cancer.data, cancer.target, test_size = 0.2, random_state = 0)
 
# params = {
#     'eta': 0.02,  #lr
#     'max_depth': 6,  
#     'min_child_weight':3,#最小叶子节点样本权重和
#     'gamma':0, #指定节点分裂所需的最小损失函数下降值。
#     'subsample': 0.7,  #控制对于每棵树,随机采样的比例
#     'colsample_bytree': 0.3,  #用来控制每棵随机采样的列数的占比 (每一列是一个特征)。
#     'lambda':2,
#     'objective': 'binary:logistic', 
#     'eval_metric': 'auc', 
#     'silent': True, 
#     'nthread': -1
# }
 
# xgb_train  = xgb.DMatrix(data_train, target_train)
# xgb_test = xgb.DMatrix(data_test, target_test)
# xgb_model = xgb.train(dtrain = xgb_train, params=params)
# xgb_predict = xgb_model.predict(xgb_train)
# xgb_predict[xgb_predict > .5] = 1
# xgb_predict[xgb_predict <= .5] = 0




data_train, data_test, target_train, target_test = train_test_split(wine.data, wine.target, test_size = 0.2, random_state = 0)
 
params = {
    'eta': 0.02,  #lr
    'num_class':3,
    'max_depth': 5, 
    'min_child_weight':1,#最小叶子节点样本权重和
    'gamma':0, #指定节点分裂所需的最小损失函数下降值。
    'subsample': 0.7,  #控制对于每棵树,随机采样的比例
    'colsample_bytree': 0.3,  #用来控制每棵随机采样的列数的占比 (每一列是一个特征)。
    'lambda':2,
    'objective': 'multi:softmax', 
    'eval_metric': 'mlogloss', 
    'silent': True, 
    'nthread': -1,
    'n_estimators':100
}
 
xgb_train  = xgb.DMatrix(data_train, target_train)
xgb_test = xgb.DMatrix(data_test, target_test)
xgb_model = xgb.train(dtrain = xgb_train, params=params)
xgb_predict = xgb_model.predict(xgb_train)
xgb_test_pred = xgb_model.predict(xgb_test)








aaa=mean_squared_error(xgb_test_pred, target_test)
print('xgboost',aaa)





from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import mean_absolute_error, r2_score,mean_squared_error
from sklearn.ensemble import RandomForestRegressor,RandomForestClassifier
import pandas as pd
import numpy as np
# import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
#train model
model_tree = RandomForestClassifier(n_estimators=102, random_state=42, )#分类用这个, 回归用RandomForestRegressor
model_tree.fit(data_train, target_train)





print('random forest',mean_squared_error(model_tree.predict(data_test), target_test))
print(1)

posted on 2025-10-14 21:22  张博的博客  阅读(3)  评论(0)    收藏  举报

导航