逻辑回归前向选择的R代码
逻辑回归见前述随笔
1
#计算pi2

pi_fun<-function(x,Beta)
{3
Beta<-matrix(as.vector(Beta),ncol=1)4
x<-matrix(as.vector(x),ncol=nrow(Beta))5
g_fun<-x%*%Beta 6
exp(g_fun)/(1+exp(g_fun)) 7
} 8
#计算信息函数9

funs<-function(x,y,Beta)
{10
x<-matrix(as.vector(x),nrow=nrow(y))11
Beta<-matrix(as.vector(Beta),ncol=1)12
13
pi_value<- pi_fun(x,Beta)14
U<-t(x)%*%(y-pi_value);15
uni_matrix<-matrix(rep(1,nrow(pi_value)),nrow= nrow(pi_value));16
H<-t(x)%*%diag(as.vector(pi_value*( uni_matrix -pi_value)))%*%x17
list(U=U,H=H)18
}19
#牛顿迭代计算Beta20

Newtons<-function(fun,x,y,ep=1e-8,it_max=100)
{21
x<-matrix(as.vector(x),nrow=nrow(y))22
Beta=matrix(rep(0,ncol(x)),nrow=ncol(x))23
Index<-0;24
k<-125

while(k<=it_max)
{26
x1<-Beta;obj<-fun(x,y,Beta);27
Beta<-Beta+solve(obj$H,obj$U);28
norm<-sqrt(t((Beta-x1))%*%(Beta-x1))29

if(norm<ep)
{30
index<-1;break31
}32
k<-k+133
}34
obj<-fun(x,y,Beta);35
36
list(Beta=Beta,it=k,index=index,U=obj$U,H=obj$H)37
}38
#拟合检验39

ModelFitStat<-function(x,y,Beta)
{40
x<-matrix(as.vector(x),nrow=nrow(y))41
Beta<-matrix(as.vector(Beta),ncol=1)42
43
uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));44
LOGL<--2*(t(y)%*%log(pi_fun(x,Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x,Beta)))45
AIC<-LOGL+2*(ncol(x))46
SC<-LOGL+(ncol(x))*log(nrow(x))47
list(LOGL=LOGL,AIC=AIC,SC=SC)48
}49

50
#回归方程显著性检验51

GlobalNullTest<-function(x,y,Beta,BetaIntercept)
{52
y<-matrix(as.vector(y),ncol=1)53
x<-matrix(as.vector(x),nrow=nrow(y))54
Beta<-matrix(as.vector(Beta),ncol=1)55
56
pi_value<- pi_fun(x,Beta)57
df<-nrow(Beta)-158
##compute Likelihood ratio59
60
MF<-ModelFitStat(x,y,Beta) 61
n1<-sum(y[y>0])62
n<-nrow(y)63
LR<--2*(n1*log(n1)+(n-n1)*log(n-n1)-n*log(n))-MF$LOGL64
LR_p_value<-pchisq(LR,df,lower.tail=FALSE)65
LR_Test<-list(LR=LR,DF=df,LR_p_value=LR_p_value)66
67
##compute Score68
69
BetaIntercept<-matrix(c(as.vector(BetaIntercept),rep(0,ncol(x)-1)),ncol=1) 70
obj<-funs(x,y,BetaIntercept) 71
Score<-t(obj$U)%*%solve(obj$H)%*%obj$U72
Score_p_value<-pchisq(Score,df,lower.tail=FALSE)73
Score_Test<-list(Score=Score,DF=df,Score_p_value=Score_p_value)74
75
##compute Wald test 76
obj<-funs(x,y,Beta)77
I_Diag<-diag((solve(obj$H))) 78
79
Q<-matrix(c(rep(0,nrow(Beta)-1),as.vector(diag(rep(1,nrow(Beta)-1)))),nrow=nrow(Beta)-1)80
Wald<-t(Q%*%Beta)%*%solve(Q%*%diag(I_Diag)%*%t(Q))%*%(Q%*%Beta) 81
82
Wald_p_value<-pchisq(Wald,df,lower.tail=FALSE)83
Wald_Test<-list(Wald=Wald,DF=df,Wald_p_value=Wald_p_value)84
85
list(LR_Test=LR_Test,Score_Test=Score_Test,Wald_Test=Wald_Test)86
}87
#配合的小函数88

WhichEqual1<-function(x)
{89
a<-NULL90

for(i in 1:length(x))
{91

if(x[i]==1)
{92
a<-c(a,i)93
}94
} 95
a96
} 97

98

CheckOut<-function(source,check)
{99

for(j in 1:length(source))
{100

for(k in 1:length(check))
{101
if(source[j]==check[k])102
source[j]<-0103
} 104
}105
source[source>0]106
}107

108
#前向选择109
##forword selection110

ForwardSel<-function(x,y,check_pvalue=0.05)
{111
##as matrix112
x<-matrix(as.vector(x),nrow=nrow(y))113
##indication of variable114
indict<-rep(0,ncol(x)) ##which column enter115
##intercept entered116
indict[1]<-1117
Beta<-NULL118
print("intercept entered
")119
Result<-Newtons(funs,x[,1],y)120
Beta<-Result$Beta121
BetaIntercept<-Result$Beta 122
uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));123
LOGL<--2*(t(y)%*%log(pi_fun(x[,1],Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x[,1],Beta)))124
print(paste("-2Log=",LOGL)) 125
indexVector<-WhichEqual1(indict) 126
##check other variable127

repeat
{ 128
pvalue<-rep(1,ncol(x))129
130
k<-2:length(indict)131
k<-CheckOut(k,indexVector) 132

for(i in k)
{ 133
obj<-funs(c(x[,indexVector],x[,i]),y,c(as.vector(Beta),0)) 134
Score<-t(obj$U)%*%solve(obj$H,obj$U) 135
Score_pvalue<-pchisq(Score,1,lower.tail=FALSE) 136
pvalue[i]<-Score_pvalue 137
}138
print(pvalue)139
pvalue_min<-min(pvalue)140

if(pvalue_min<check_pvalue)
{141
j<-which.min(pvalue) 142
print(paste(x_name[j]," entered:"))143
##set indication of variable144
indict[j]<-1145
indexVector<-WhichEqual1(indict)146
Result<-Newtons(funs,x[,indexVector],y)147
Beta<-NULL148
Beta<-Result$Beta149
#print("Beta")150
#print(Beta) 151
152
##compute model fit statistics153
print("Model Fit Statistics")154
MFStat<-ModelFitStat(x[,indexVector],y,Beta)155
print(MFStat)156
##test globel null hypothesis:Beta=0 157
print("Testing Global Null Hypothese:Beta=0") 158
GNTest<-GlobalNullTest(x[,indexVector],y,Beta,BetaIntercept)159
print(GNTest) 160
}161

if(pvalue_min>=check_pvalue||all(indict)>0)
{ 162
print("No effects met the 0.05 significance level for entry into the model") 163
break164
}165
}166
167
##Analysis of Maximum Likelihood Estimates168
indexVector<-WhichEqual1(indict)169
print("Analysis of Maximum Likelihood Estimates")170
print(Beta)171
172
##compute variable Wald chi-square173
obj<-funs(x[,indexVector],y,Beta)174
H_Diag<-sqrt(diag(solve(obj$H))) 175
WaldChisq<-(as.vector(Beta)/H_Diag)^2176
WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)177
WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)178
print(WaldChisqTest)179
}使用例子
1
#example32
rs<-read.csv("example2R.csv",head=TRUE)3
x<-matrix(c(rep(1,189),rs[,3],rs[,4],rs[,12],rs[,13],rs[,6],rs[,7],rs[,8],rs[,9],rs[,10]),nrow=189)4
y<-matrix(rs[,2],nrow=189)5
x_name<-c("INTERCEPT","AGE","LWT","RACE2","RECE3","SMOKE","PTL","HT","UI","FTV")6
ForwardSel(x,y)
浙公网安备 33010602011771号