R语言 Delong检验细节如何实现
#### Delong Statistics calculation 2023-04-14
## Variance calculation function
VarSe <- function(PosProb,NegProb, Auc){
XiA <- PosProb; YiB <- NegProb; m <- length(XiA); n <- length(YiB)
Var10A <- c()
for(i in 1:m){
Tmp <- c()
ValueA <- XiA[i]
for(j in 1:n){
ValueB <- YiB[j]
if(ValueA>ValueB){
Tmp <- c(Tmp,1/n)
}
if(ValueA==ValueB){
Tmp <- c(Tmp,0.5/n)
}
if(ValueA<ValueB){
Tmp <- c(Tmp,0)
}
}
Var10A <- c(Var10A,sum(Tmp))
}
Var01A <- c()
for(j in 1:n){
Tmp <- c()
ValueB <- YiB[j]
for(i in 1:m){
ValueA <- XiA[i]
if(ValueA > ValueB){
Tmp <- c(Tmp,1/m)
}
if(ValueA==ValueB){
Tmp <- c(Tmp,0.5/m)
}
if(ValueA<ValueB){
Tmp <- c(Tmp,0)
}
}
Var01A <- c(Var01A ,sum(Tmp))
}
# variance calicaton
S10 <- c()
for(i in 1:m){
S10 <- c(S10,(Var10A[i]-Auc)^2)
}
S10 <- sum(S10)/(m-1)
S01 <- c()
for(i in 1:n){
S01 <- c(S10,(Var01A[i]-Auc)^2)
}
S01 <- sum(S01)/(m-1)
SA <- S10/m + S01/n
# 结果输出
Output <- list(Var10A, Var01A, SA,S10,S01)
names(Output) <- c("Var_10","Var_01","Viance","Variance1","Variance0")
Output
}
# AUC及其AUC的方差估计
Theta <- function(PosProb,NegProb){
XiA <- PosProb; YiB <- NegProb; m <- length(XiA); n <- length(YiB)
Theta <- c()
for(i in 1:m){
ValueA <- XiA[i]
for(j in 1:n){
ValueB <- YiB[j]
if(ValueA>ValueB){
Theta <- c(Theta,1)
}
if(ValueA==ValueB){
Theta <- c(Theta,0.5)
}
if(ValueA<ValueB){
Theta <- c(Theta,0)
}
}
}
ThetaMat <- sum(Theta)/(n*m)
ThetaMat
}
#### load data, The table consists of four columns, namely Group, Label, Model 1 probability, and Model 2 probability.
# work directory,文件读取和保存的位置设置
setwd("E:\\UAI_Program\\3-ChuanBeiHospital\\qinshize")
Data <- read.csv("DelongTest.csv")
# 正类样本数和负类样本数
m <- length(which(Data[,1]==1))
n <- length(which(Data[,1]==0))
# 模型1中,正类样本和负类样本被识别为正类的概率
XiA <- Data[which(Data[,1]==1),2]
XiB <- Data[which(Data[,1]==0),2]
# 模型2中,正类样本和负类样本被识别为负类的概率
YiA <- Data[which(Data[,1]==1),3]
YiB <- Data[which(Data[,1]==0),3]
ThetaA <- Theta(XiA,XiB)
ThetaB <- Theta(YiA,YiB)
# 不同模型的方差估计
VarSeA <- VarSe(XiA,XiB,Auc1)
VarSeB <- VarSe(YiA,YiB,Auc2)
VarA <- VarSeA[[3]]
VarB <- VarSeB[[3]]
## 整体协方差估计
S10AB <- c()
for(i in 1:m){
Tmp <- (VarSeA[[1]][m]-Auc1)^2*(VarSeB[[1]][m]-Auc2)^2
S10AB <- c(S10AB,Tmp)
}
S10AB <- sum(S10AB)/(m-1)
S01AB <- c()
for(i in 1:n){
Tmp <- (VarSeA[[2]][n]-Auc1)^2*(VarSeB[[2]][n]-Auc2)^2
S01AB <- c(S01AB,Tmp)
}
S01AB <- sum(S01AB)/(n-1)
# 加权获得两个模型的协方差
SAB <- S10AB/m + S01AB/n
###### Delong Z statistics calculation
Z <- (ThetaA-ThetaB)/sqrt(VarA + VarB - 2*SAB); Z
# p值计算
2*pnorm(q=Z, lower.tail= F)