逻辑回归后向选择的R代码
逻辑回归见前述文章。
1
#计算pi 2

pi_fun<-function(x,Beta)
{3
Beta<-matrix(as.vector(Beta),ncol=1)4
x<-matrix(as.vector(x),ncol=nrow(Beta))5
g_fun<-x%*%Beta 6
exp(g_fun)/(1+exp(g_fun)) 7
} 8
#计算信息矩阵9

funs<-function(x,y,Beta)
{10
x<-matrix(as.vector(x),nrow=nrow(y))11
Beta<-matrix(as.vector(Beta),ncol=1)12
13
pi_value<- pi_fun(x,Beta)14
U<-t(x)%*%(y-pi_value);15
uni_matrix<-matrix(rep(1,nrow(pi_value)),nrow= nrow(pi_value));16
H<-t(x)%*%diag(as.vector(pi_value*( uni_matrix -pi_value)))%*%x17
list(U=U,H=H)18
}19
#牛顿迭代法计算Beta20

Newtons<-function(fun,x,y,ep=1e-8,it_max=100)
{21
x<-matrix(as.vector(x),nrow=nrow(y))22
Beta=matrix(rep(0,ncol(x)),nrow=ncol(x))23
24
Index<-0;25
k<-126

while(k<=it_max)
{27
x1<-Beta;obj<-fun(x,y,Beta);28
Beta<-Beta+solve(obj$H,obj$U);29
norm<-sqrt(t((Beta-x1))%*%(Beta-x1))30

if(norm<ep)
{31
index<-1;break32
}33
k<-k+134
}35
obj<-fun(x,y,Beta);36
37
list(Beta=Beta,it=k,index=index,U=obj$U,H=obj$H)38
}39
#拟合检验40

ModelFitStat<-function(x,y,Beta)
{41
x<-matrix(as.vector(x),nrow=nrow(y))42
Beta<-matrix(as.vector(Beta),ncol=1)43
44
uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));45
LOGL<--2*(t(y)%*%log(pi_fun(x,Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x,Beta)))46
AIC<-LOGL+2*(ncol(x))47
SC<-LOGL+(ncol(x))*log(nrow(x))48
list(LOGL=LOGL,AIC=AIC,SC=SC)49
}50

51
#回归方程显著性检验52

GlobalNullTest<-function(x,y,Beta,BetaIntercept)
{53
y<-matrix(as.vector(y),ncol=1)54
x<-matrix(as.vector(x),nrow=nrow(y))55
Beta<-matrix(as.vector(Beta),ncol=1)56
57
pi_value<- pi_fun(x,Beta)58
df<-nrow(Beta)-159
##compute Likelihood ratio60
61
MF<-ModelFitStat(x,y,Beta) 62
n1<-sum(y[y>0])63
n<-nrow(y)64
LR<--2*(n1*log(n1)+(n-n1)*log(n-n1)-n*log(n))-MF$LOGL65
LR_p_value<-pchisq(LR,df,lower.tail=FALSE)66
LR_Test<-list(LR=LR,DF=df,LR_p_value=LR_p_value)67
68
##compute Score69
70
BetaIntercept<-matrix(c(as.vector(BetaIntercept),rep(0,ncol(x)-1)),ncol=1) 71
obj<-funs(x,y,BetaIntercept) 72
Score<-t(obj$U)%*%solve(obj$H)%*%obj$U73
Score_p_value<-pchisq(Score,df,lower.tail=FALSE)74
Score_Test<-list(Score=Score,DF=df,Score_p_value=Score_p_value)75
76
##compute Wald test 77
obj<-funs(x,y,Beta)78
I_Diag<-diag((solve(obj$H))) 79
80
Q<-matrix(c(rep(0,nrow(Beta)-1),as.vector(diag(rep(1,nrow(Beta)-1)))),nrow=nrow(Beta)-1)81
Wald<-t(Q%*%Beta)%*%solve(Q%*%diag(I_Diag)%*%t(Q))%*%(Q%*%Beta) 82
83
Wald_p_value<-pchisq(Wald,df,lower.tail=FALSE)84
Wald_Test<-list(Wald=Wald,DF=df,Wald_p_value=Wald_p_value)85
86
list(LR_Test=LR_Test,Score_Test=Score_Test,Wald_Test=Wald_Test)87
}88
#小函数89

WhichEqual1<-function(x)
{90
a<-NULL91

for(i in 1:length(x))
{92

if(x[i]==1)
{93
a<-c(a,i)94
}95
} 96
a97
} 98

99

CheckOut<-function(source,check)
{100

for(j in 1:length(source))
{101

for(k in 1:length(check))
{102
if(source[j]==check[k])103
source[j]<-0104
} 105
}106
source[source>0]107
}108

109
#后向删除110

BackwardSel<-function(x,y,check_pvalue=0.05)
{111
##as matrix112
y<-matrix(as.vector(y),ncol=1)113
x<-matrix(as.vector(x),nrow=nrow(y))114
##indication of variable115
indict<-rep(1,ncol(x)) ##which column remove116
## 1:keep 117
## 0:remove118
119

repeat
{120
121
indexVector<-WhichEqual1(indict)122
123
##compute variable Wald chi-square124
Result<-Newtons(funs,x[,indexVector],y)125
obj<-funs(x[,indexVector],y,Result$Beta)126
H_Diag<-sqrt(diag(solve(obj$H))) 127
WaldChisq<-(as.vector(Result$Beta)/H_Diag)^2128
WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)129
WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)130
print(WaldChisqTest)131
132
##Model Fit Statistics133
MFStat<-ModelFitStat(x[,indexVector],y,Result$Beta)134
print("Model Fit Statistics")135
print(MFStat)136
137
##Testing Global Null Hypothesis: BETA=0138
ResultIntercept<-Newtons(funs,x[,1],y) 139
GNTest<-GlobalNullTest(x[,indexVector],y,Result$Beta,ResultIntercept$Beta)140
print("Testing Global Null Hypothesis: BETA=0")141
print(GNTest) 142
143
##check variable144
pvalue_max<-max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept145

if(pvalue_max>check_pvalue)
{146
j<-which.max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept147
print(paste(x_name[indexVector[j+1]]," is removed:"))148
##set indication of variable149
indict[indexVector[j+1]]<-0150
}151

if(pvalue_max<=check_pvalue)
{152
print("No (additional) effects met the 0.05 significance level for removal from the model.")153
print("Analysis of Maximum Likelihood Estimates")154
print(Result$Beta)155
print(WaldChisqTest)156
break157
}158
} 159
160
} 使用方法如下
1
#example3--backward2
rs<-read.csv("example2R.csv",head=TRUE)3
x<-matrix(c(rep(1,189),rs[,3],rs[,4],rs[,12],rs[,13],rs[,6],rs[,7],rs[,8],rs[,9],rs[,10]),nrow=189)#选择列以充作自变量4
y<-matrix(rs[,2],nrow=189)#选择因变量5
x_name<-c("INTERCEPT","AGE","LWT","RACE2","RECE3","SMOKE","PTL","HT","UI","FTV")6
#自变量名字7
BackwardSel(x,y)
浙公网安备 33010602011771号