1.测试集验证集合

library(ggplot2)

library(caret)

seed(123456) 设置随机数种子

prop.table(table(xxx字段)

index<-createDataParatition

xunlianji<- credit[index,]

ceshiji<- credit[-index]

prop.table(table(xunlianji))

prop.table(table(ceshiji))

dim(xunlianji)

dim(ceshiji)

2.建模分析

fit<- gim(y~.,data= xunlianji,family="xxx")

summary

看分析结果 修正字段

fit1<- gim(y~age+xxx字段.,data= xunlianji,family="xxx")

summary(fit1)

模型修正

as.factor(xunlianji)

SMOTE(y~.,xunlianji,perc.over = 500, perc.under=100)

prop.table(table(xxx训练字段))

 

模型评估

install.packages("pROC", repo=" tsinghua.eduxxx")

library(pROC)

pre <- predit(fit1,ceshiji)

modelroc <- roc(xxx测试字段,pre)

modelroc 输出

plot(modelroc,print.auc=TRUE, auc.polygon=TRUE,grid=c(0.1,0.2), grid.col=c('GREEN','RED'),max.auc.polygoon=TRUE,auc.polygon.col="skyblue",print.thres=TRUE)

建立评分卡

library(sqldf)

library(gsubfn)

library(smbinning)

对数据分箱

par(mar=C(5,4,2,3))

如取AGE 字段分箱 

age <- smbinning(table,"y","xxx字段")

age$iv 查看字段IV

par(mfrow=c(2,2))

smbinning.plot(age,option="dist", sub="名称")

smbinning.plot(age,option="WOE", sub="名称")

smbinning.plot(age,option="goodrate", sub="名称")

smbinning.plot(age,option="badrate", sub="名称")

par(mfrow=c(1,1))

age$iv 查看字段IV

xxx<-table

 xxx<-smbinning.gen(xxx,字段,"名称")

 xxx<-smbinning.gen(xxx,字段,"名称")

 xxx<-smbinning.gen(xxx,字段,"名称")

 xxx<-smbinning.gen(xxx,字段,"名称")

head(xxx)

查看 要生成的新列 xxx_new <-xxx[,c(1,11:18)]

head(xxx_new)

xxx_mod<-glm(y~., data=xxx_new,family=binomial())

summary(xxx_new)

打分

cre_scal <-smbinning.scaliing(cred_mod,pdo=45,score=800,odds=50)

cre_scal$minmaxscore

cre_scal$logitscaled

 

对每行生成对应的分值

xxx4<-smbinning.scoring.gen(smbscaled=xxx_scal,dataset=xxx_new)

view(xxx4)

 

boxplot(score~y,data=xxx4,horizontal=T,fram=F,col="lightgry",main="distrbution")

分类器的性能和比较·

smbinning.metrics(xxx4,"score","y",plot="auc")

 

一般银行都会根据分值决定贷款阈值 ,是否符合 数据量对企业风险大不大。