R语言 Delong检验细节如何实现

#### Delong  Statistics calculation 2023-04-14

## Variance calculation function

VarSe <- function(PosProb,NegProb, Auc){
  XiA <- PosProb; YiB <- NegProb; m <- length(XiA); n <- length(YiB)
  Var10A <- c()
  for(i in 1:m){
    Tmp <- c()
    ValueA <- XiA[i]
     for(j in 1:n){
      ValueB <- YiB[j]
      if(ValueA>ValueB){
        Tmp <- c(Tmp,1/n)
      }
      if(ValueA==ValueB){
        Tmp <- c(Tmp,0.5/n)
      }
      if(ValueA<ValueB){
        Tmp <- c(Tmp,0)
      }
    }
  Var10A <- c(Var10A,sum(Tmp))
  }

  Var01A <- c()
  for(j in 1:n){
  Tmp <- c()
  ValueB <- YiB[j]
  for(i in 1:m){
    ValueA <- XiA[i]
    if(ValueA > ValueB){
      Tmp <- c(Tmp,1/m)
      }
    if(ValueA==ValueB){
      Tmp <- c(Tmp,0.5/m)
    }
    if(ValueA<ValueB){
      Tmp <- c(Tmp,0)
    }
   }
  Var01A <- c(Var01A ,sum(Tmp))
  }
  # variance calicaton
  S10 <- c()
  for(i in 1:m){
    S10 <- c(S10,(Var10A[i]-Auc)^2)
  }
  S10 <- sum(S10)/(m-1)

  S01 <- c()
  for(i in 1:n){
    S01 <- c(S10,(Var01A[i]-Auc)^2)
  }
  S01 <- sum(S01)/(m-1)
  SA <- S10/m + S01/n

  # 结果输出
  Output <- list(Var10A, Var01A, SA,S10,S01)
  names(Output) <- c("Var_10","Var_01","Viance","Variance1","Variance0")
  Output
}

# AUC及其AUC的方差估计
Theta <- function(PosProb,NegProb){
  XiA <- PosProb; YiB <- NegProb; m <- length(XiA); n <- length(YiB)
  Theta <- c()
  for(i in 1:m){
    ValueA <- XiA[i]
    for(j in 1:n){
      ValueB <- YiB[j]
      if(ValueA>ValueB){
        Theta <- c(Theta,1)
      }
      if(ValueA==ValueB){
        Theta <- c(Theta,0.5)
      }
      if(ValueA<ValueB){
        Theta <- c(Theta,0)
      }
    }
  }
  ThetaMat <- sum(Theta)/(n*m)
  ThetaMat
}

 

####  load data, The table consists of four columns, namely Group, Label, Model 1 probability, and Model 2 probability.

# work directory,文件读取和保存的位置设置
setwd("E:\\UAI_Program\\3-ChuanBeiHospital\\qinshize")

Data <- read.csv("DelongTest.csv")

# 正类样本数和负类样本数

m <- length(which(Data[,1]==1))

n <- length(which(Data[,1]==0))

 # 模型1中,正类样本和负类样本被识别为正类的概率

XiA <- Data[which(Data[,1]==1),2]
XiB <- Data[which(Data[,1]==0),2]

# 模型2中,正类样本和负类样本被识别为负类的概率

YiA <- Data[which(Data[,1]==1),3]

YiB <- Data[which(Data[,1]==0),3]

 # 不同模型AUC的估计,通过和roc函数计算两个模型的AUC值相比,估计正确。

ThetaA <- Theta(XiA,XiB)
ThetaB <- Theta(YiA,YiB)

# 不同模型的方差估计
VarSeA <- VarSe(XiA,XiB,Auc1)
VarSeB <- VarSe(YiA,YiB,Auc2)

VarA <- VarSeA[[3]]
VarB <- VarSeB[[3]]

## 整体协方差估计

S10AB <- c()
for(i in 1:m){
  Tmp <- (VarSeA[[1]][m]-Auc1)^2*(VarSeB[[1]][m]-Auc2)^2
  S10AB <- c(S10AB,Tmp)
}
S10AB <- sum(S10AB)/(m-1)

S01AB <- c()
for(i in 1:n){
  Tmp <- (VarSeA[[2]][n]-Auc1)^2*(VarSeB[[2]][n]-Auc2)^2
  S01AB <- c(S01AB,Tmp)
}
S01AB <- sum(S01AB)/(n-1)

# 加权获得两个模型的协方差

SAB <- S10AB/m + S01AB/n

###### Delong Z statistics calculation

Z <- (ThetaA-ThetaB)/sqrt(VarA + VarB - 2*SAB); Z

# p值计算
2*pnorm(q=Z, lower.tail= F)

posted @ 2023-04-14 17:17  王姑娘呀~  阅读(412)  评论(0编辑  收藏  举报