#-*- coding: utf-8 -*-

from sklearn.ensemble import GradientBoostingClassifier

import numpy as np

from sklearn.preprocessing import OneHotEncoder

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix

from data_deal import data_deal

import xgboost as xgb

from sklearn.externals import joblib

from sklearn.model_selection import train_test_split

 

#data = np.random.rand(5000,10)

 

#label = np.random.randint(2, size=5000)

 

data, label = data_deal()

 

x_train,x_test,y_train,y_test = train_test_split(data,label,test_size=0.3,random_state=0)

 

#data = data[:100000]

#label = label[:100000]

'''

gbdt_model = GradientBoostingClassifier(n_estimators = 100)

 

gbdt_model.fit(data, label)

 

p = gbdt_model.apply(data)

'''

 

dtrain = xgb.DMatrix(x_train, label=y_train)

  

#dtest = xgb.DMatrix(data)

 

dtrain_x = xgb.DMatrix(x_train)

 

param={'booster':'gbtree',

    'objective': 'binary:logistic',

    'eval_metric': 'auc',

    'max_depth':5,

    'lambda':10,

    'subsample':0.8,

    'colsample_bytree':0.8,

    'min_child_weight':10,

    'eta': 0.1,

    'seed':0,

    'nthread':8,

     'silent':1}

 

evallist  = [(dtrain,'train')]

 

num_round = 300

bst = xgb.train(param, dtrain, num_round, evallist)

 

bst.save_model('xgb_test.model')

 

#bst.load_model('xgb_test.model')

 

p = bst.predict(dtrain_x, pred_leaf=True)

 

one_hot_encoder = OneHotEncoder()

 

one_hot_encoder.fit(p)

 

joblib.dump(one_hot_encoder, "one_hot_encoder.model")

 

one_hot_encoder_feature = one_hot_encoder.transform(p).toarray()

 

lr_model = LogisticRegression()

 

lr_model.fit(one_hot_encoder_feature, y_train)

 

joblib.dump(lr_model, "lr_test.model")

 

predict_label = lr_model.predict(one_hot_encoder_feature)

 

print 'train:', roc_auc_score(y_train, predict_label)

print 'train:', classification_report(y_train, predict_label)

print 'train:', confusion_matrix(y_train, predict_label)

 

'''

 

dtest_x = xgb.DMatrix(x_test)

 

bst_load = xgb.Booster({'nthread': 8})  # init model

bst_load.load_model('xgb_test.model')

 

p = bst_load.predict(dtest_x, pred_leaf=True)

 

one_hot_encoder_load = joblib.load('one_hot_encoder.model')

 

one_hot_encoder_feature = one_hot_encoder_load.transform(p).toarray()

 

lr_model_load = joblib.load('lr_test.model')

 

predict_label = lr_model_load.predict(one_hot_encoder_feature)

 

print 'test:', roc_auc_score(y_test, predict_label)

print 'test:', classification_report(y_test, predict_label)

print 'test:', confusion_matrix(y_test, predict_label)

'''