library(gtools)
library(intrval)

library(optrees)
library(cppRouting)

### gROC function: to estimate the right-sided, left-sided and/or gROC curve (with or without the condition (C)) for a univariate marker X

### Input parameters ###
# X: marker values [vector]
# D: population indicator [vector with two levels]
# side: type of ROC curve. One from "right" (right-sided ROC curve),"left" (left-sided ROC curve),"both" (gROC curve)
# restric: if TRUE, condition (C) should be fulfilled by the classification subsets on which the gROC curve is based (only if side is "both")
# optimal: if TRUE, the gROC curve under condition (C) with maximum gAUC is estimated. Computationally intensive
# t0: starting point (m+1)*FPR where m is the negative sample size and FPR the False-Positive Rate (1-Specificity). 
#     If NULL, FPR is selected as that one achieving the Youden index
# t0max: if TRUE, restricted gROC curves for every possible (m+1)*FPR as starting point are computed and the one reporting the maximum gAUC is selected. Computationally intensive

### Output parameters ###
# levels: group/population labels referred to negative and positive, respectively
# controls: marker values in the negative sample
# cases: marker values in the positive sample
# side: type of ROC curve according to shape of the classification subsets (where it is classified as negative). One from "right" (st=(c,infty)),"left" (st=(-infty,c)),"both" (st=[xl,xu])
# t: false-positive rates in the interval [0,1] where the ROC curve is estimated
# roc: ROC curve value for every FPR in t
# c, xl, xu: limits for classification subset for every FPR in t
# auc: area under the estimated ROC curve
# aucfree: if restric is TRUE, area under the estimated ROC curve without imposing condition (C)

gROC <- function(X, D, side=c("right","left","both"), restric=FALSE, optimal=FALSE, t0=NULL, t0max=FALSE, ...){
  
  levels.names <- levels(as.factor(D))
  controls <- split(X,D)[[levels.names[1]]]; cases <- split(X,D)[[levels.names[2]]]
  D <- ifelse(as.factor(D)==levels.names[1], 0, 1); levels <- levels(as.factor(D))
  
  m <- length(controls)
  
  t <- seq(0,1,1/m)
  N <- length(t)
  
  XX <- sort(c(controls,cases))
  e <- ifelse(length(unique(XX))>1, min(unique(XX)[-1]-unique(XX)[-length(unique(XX))])/2, sqrt(.Machine$double.eps))
  
  main <- function(side){
    if(side=='right'){
      c <- as.numeric(quantile(controls,1-t,type=3))
      roc <- 1-ecdf(cases)(c)
      results <- list(roc=roc, c=c)
    }
    if(side=='left'){
      c <- as.numeric(quantile(controls,t,type=3)) - e*as.numeric(ecdf(controls)(quantile(controls,t,type=3)) > t)
      roc <- ecdf(cases)(c)
      results <- list(roc=roc, c=c)
    }
    if(side=='both'){
      A <- sapply(1:N, function(i){
        if(i == N){
          roc <- 1; xl <- xu <- max(controls)
        }else{
          gamma <- seq(1,i,1)
          index.gamma.t <- which.max(ecdf(cases)(sort(controls)[gamma]-e) + 1 - ecdf(cases)(sort(controls)[m-i+gamma]))
          gamma.t <- gamma[index.gamma.t]
          xl <-  sort(controls)[gamma.t]; xu <- sort(controls)[m-i+gamma.t]
          roc <- ecdf(cases)(xl-e) + 1 - ecdf(cases)(xu)
        }
        c(roc, xl, xu)
      })
      results <- list(roc=A[1,], xl=A[2,], xu=A[3,])
    }
    return(results)
  }
  
  side <- match.arg(side)
  mainres <- main(side)
  
  roc <- mainres$roc
  if(side=='right' || side=='left') c <- mainres$c
  if(side=='both') xl <- mainres$xl; xu <- mainres$xu
  
  # auc <- mean(roc[-1] + roc[-N])/2
  auc <- sum(roc[-N]*(t[-1] - t[-N]))
  
  if((side!='both') & restric==TRUE){
    warning('The non-parametric estimation with restriction is just computed for the generalization (side="both").')
    restric <- FALSE
  }
  
  if(restric & optimal){
    
    aucfree <- auc
    
    X0 <- sort(controls); X1 <- sort(cases)
    m <- length(X0); n <- length(X1)
    
    index.pairs <- combinations(m,2)
    pair.points <- matrix(X0[index.pairs], ncol = 2)
    N <- nrow(pair.points)
    
    if(side=='both'){
      
      A <- sapply(1:N, function(i){
        xl <- pair.points[i,1]; xu <- pair.points[i,2]
        c(sum(X0 %[]% c(xl,xu)), sum(X1 %[]% c(xl,xu)))
      })
      info <- cbind(1:N, pair.points, A[1,]/m, (n-A[2,])/n, index.pairs[,2] - index.pairs[,1])
      info
      
      index.base <- NULL
      cat("Computing matrix of weights...\n")
      bar <- txtProgressBar(min = 1, max = (m-1), style = 3)
      for(k in 1:(m-1)){
        index.base1 <- expand.grid(which(info[,6]==k), which(info[,6]==(k+1)))
        i <- index.base1[,1]; j <- index.base1[,2]
        index.val <- which(apply(pair.points[i,] %[]% pair.points[j,],1,sum)==2)
        I <- i[index.val]; J <- j[index.val]
        index.base <- rbind(index.base, cbind(I,J,abs((1-info[J,5])*(info[J,4]-info[I,4]))))
        setTxtProgressBar(bar,k)
      }
      close(bar)
      
      MAX <- max(index.base[index.base[,3]!=Inf,3], na.rm = TRUE) + 1
      index.extra <- which(info[,6]<=1)
      index.base <- rbind(index.base, cbind(rep(N+1,length(index.extra)), index.extra, MAX))
      
      
      #final.point <- which(info[,4] == 1)
      final.point <- which.max(info[,3] - info[,2])
      
      arcs.Cmat <- index.base
      colnames(arcs.Cmat) <- c("head", "tail", "weight")
      
      edges <- data.frame(from_vertex = arcs.Cmat[,1], o_vertex = arcs.Cmat[,2], cost = arcs.Cmat[,3])
      nodes <- unique(c(edges$from_vertex,edges$to_vertex))
      Graph <- makegraph(edges, directed=TRUE)
      
      Dijsktra.output <- get_path_pair(Graph, from = N+1, to = final.point, algorithm = "Dijkstra")
      output.complete <- list(walk.nodes = rev(as.numeric(Dijsktra.output[[1]])))
      
      output.complete$walk.arcs <- t(sapply(1:(length(output.complete$walk.nodes)-1), function(i){
        node.s <- output.complete$walk.nodes[i];
        node.f <- output.complete$walk.nodes[i+1];
        c(node.s, node.f, arcs.Cmat[which(arcs.Cmat[,1]==node.s & arcs.Cmat[,2]==node.f), "weight"])
      }))
      output.complete$walk.arcs
      
      output.optimal <- info[output.complete$walk.nodes[-1],]
      output.optimal
      
      ROC <- rev(output.optimal[,5])
      TT <- rev(1-output.optimal[,4])
      
      results <- list(roc=ROC, t=TT, xl=rev(output.optimal[,2]), xu=rev(output.optimal[,3]), auc=sum(ROC[-length(ROC)]*(TT[-1] - TT[-length(ROC)])) + ROC[length(ROC)]*(1-TT[length(ROC)]))
      
    }
    
    
    roc <- results$roc; t <- results$t
    xl <- results$xl; xu <- results$xu
    auc <- results$auc
    
  }
  
  if(restric & !optimal){
    
    aucfree <- auc
    
    if(side=='both'){
      
      gamma.t <- function(i){
        gamma <- seq(1,i,1)
        index.gamma.t <- which.max(ecdf(cases)(sort(controls)[gamma]-e) + 1 - ecdf(cases)(sort(controls)[m-i+gamma]))
        gamma[index.gamma.t]
      }
      
      auc.rest.i0 <- function(i0){
        TT <- rep(0,N); TT[i0] <- t[i0]
        ROC <- rep(0,N); ROC[i0] <- roc[i0]
        XL <- rep(0,N); XL[i0] <- xl[i0]
        XU <- rep(0,N); XU[i0] <- xu[i0]
        GAMMA <- rep(0,N); GAMMA[i0] <- gamma.t(min(i0,m))
        
        for(i in seq(max(i0-1,1),1,-1)){
          TT[i] <- (i-1)/m
          if(ROC[i+1]==0 || i==1){
            XL[i] <- min(controls); XU[i] <- max(controls)
          }else{
            gamma <- c(max(GAMMA[i+1]-1,1), GAMMA[i+1])
            index.gamma.t <- which.max(ecdf(cases)(sort(controls)[gamma]-e) + 1 - ecdf(cases)(sort(controls)[m-i+gamma]))
            GAMMA[i] <- gamma[index.gamma.t]
            XL[i] <-  sort(controls)[GAMMA[i]]; XU[i] <- sort(controls)[m-i+GAMMA[i]]
          }
          ROC[i] <- ecdf(cases)(XL[i]-e) + 1 - ecdf(cases)(XU[i])
        }
        
        for(i in seq(min(i0+1,m+1),m+1,1)){
          TT[i] <- (i-1)/m
          if(ROC[i-1]==1 || i==m+1){
            XL[i] <- XL[i-1]; XU[i] <- XU[i-1]
          }else{
            gamma <- c(GAMMA[i-1], GAMMA[i-1]+1)
            index.gamma.t <- which.max(ecdf(cases)(sort(controls)[gamma]-e) + 1 - ecdf(cases)(sort(controls)[m-i+gamma]))
            GAMMA[i] <- gamma[index.gamma.t]
            XL[i] <-  sort(controls)[GAMMA[i]]; XU[i] <- sort(controls)[m-i+GAMMA[i]]
          }
          ROC[i] <- ecdf(cases)(XL[i]-e) + 1 - ecdf(cases)(XU[i])
        }
        
        results <- list(roc=ROC, t=TT, xl=XL, xu=XU, auc=sum(ROC[-N]*(TT[-1] - TT[-N])))
        return(results)
      }
      
    }
    
    if(t0max){
      cat("Progress bar: Estimation of the optimal initial point of Sp\n"); flush.console()
      bar <- txtProgressBar(min = 0, max = N, style = 3)
      aucsi0 <- sapply(1:N, function(i0){
        setTxtProgressBar(bar, i0)
        auc.rest.i0(i0)$auc
      })
      close(bar)
      i0max <- which.max(aucsi0)
      results <- auc.rest.i0(i0max)
    }else{
      if(is.null(t0)){
        i0 <- which.max(roc - t)
      }else{
        if(t0<1 | t0>N | t0%%1!=0){
          stop("t0 should be an integer number between 1 and control size + 1")
        }else{
          i0 <- t0
        }
      }
      results <- auc.rest.i0(i0)
    }
    
    roc <- results$roc; t <- results$t
    xl <- results$xl; xu <- results$xu
    auc <- results$auc
    
  }
  
  if(side=='right' || side=='left'){
    results <- list(levels=levels.names, controls=controls, cases=cases, side=side, t=t, roc=roc, auc=auc, c=c, param=FALSE)
  }
  if(side=='both'){
    if(restric){
      if(t0max){
        results <- list(levels=levels.names, controls=controls, cases=cases, side=side, t=t, roc=roc, auc=auc, aucfree=aucfree, aucs=aucsi0, xl=xl, xu=xu, param=FALSE)
      }else{
        results <- list(levels=levels.names, controls=controls, cases=cases, side=side, t=t, roc=roc, auc=auc, aucfree=aucfree, xl=xl, xu=xu, param=FALSE)
      }
    }else{
      results <- list(levels=levels.names, controls=controls, cases=cases, side=side, t=t, roc=roc, auc=auc, xl=xl, xu=xu, param=FALSE)
    }
  }
  
  attr(results, 'class') <- 'groc'
  
  return(results)
  
}




### gAUC.test function: to perform hypothesis testing for comparing gAUCs (under the restriction (C))

### Input parameters ###
# groc1: 'groc' object (output from function gROC) referred to marker 1
# groc2: 'groc' object (output from function gROC) referred to marker 2. If groc2 is NULL, one sample hypothesis testing is performed
# null: gAUC value (if groc2 is NULL) or gAUC difference value (if groc2 is provided) for the null hypothesis. Default: 0.5 for the first case and 0 for the second one
# alpha: significance level for confidence interval (C.I.)
# weights: if groc2 is provided, the hypothesis testing for the null H0: weights[1]*gauc1 + weights[2]*gauc2 = null is performed

### Output parameters ###
## If groc2 is NULL
# gauc: gAUC estimate
# conf.int: 100*(1-alpha)% confidence interval for gAUC
# p.value: p-value for testing the null H0: gAUC = 0.5
## If groc2 is provided
# gauc1, gauc2: gAUC estimate for each marker
# statistic.value: weights[1] * gauc1 + weights[2] * gauc2
# conf.int: 100*(1-alpha)% confidence interval for weights[1] * gAUC1 + weights[2] * gAUC2
# p.value: p-value for testing the null H0: weights[1] * gAUC1 + weights[2] * gAUC2 = 0

gAUC.test <- function(groc1, groc2 = NULL, null = 0, alpha = 0.05, weights = c(-1,1)){
  
  m <- length(groc1$controls); n <- length(groc1$cases)
  X1 <- c(groc1$controls, groc1$cases)
  D <- c(rep(0,m), rep(1,n))
  
  gauc1 <- groc1$auc
  
  if(groc1$side == "both"){
  	  Lt <-  groc1$xl; Ut <- groc1$xu
  }else if(groc1$side == "right"){
  	  Lt <- min(X1); Ut <- groc1$c
  }else{
  	  Lt <- groc1$c; Ut <- max(X1)
  }  	
  	
  TX1 <- 1 - sapply(X1, function(x){mean(x %[]% cbind(Lt, Ut))})
  Ty1 <- TX1[D==0]; Tx1 <- TX1[D==1]
  
  Ai.1 <- ecdf(Ty1)(Tx1); Bj.1 <- ecdf(Tx1)(Ty1)
  var.gauc1 <- 1/n * (mean(Ai.1^2) - mean(Ai.1)^2) + 1/m * (mean(Bj.1^2) - mean(Bj.1)^2)
  
  alpha <- 0.05
  z.alpha <- qnorm(1-alpha/2)
  
  if(is.null(groc2)){
    
    sd.gauc1 <- sqrt(var.gauc1)
    CI <- c(gauc1 - z.alpha*sd.gauc1, gauc1 + z.alpha*sd.gauc1)
    
    if(null == 0){null <- 0.5}
    p.value <- 2*(1 - pnorm(abs(gauc1 - null)/sd.gauc1))
    
    results <- list(groc = groc1, gauc = gauc1, null = 0, alpha = 0.05, p.value = p.value, conf.int = CI)
    
  }else{
    
    X2 <- c(groc2$controls, groc2$cases)
    D <- c(rep(0,m), rep(1,n))
    
    gauc2 <- groc2$auc
    
    if(groc2$side == "both"){
  	  	Lt <-  groc2$xl; Ut <- groc2$xu
  	}else if(groc2$side == "right"){
  	  	Lt <- min(X2); Ut <- groc2$c
  	}else{
  	  	Lt <- groc2$c; Ut <- max(X2)
  	}  
    
    TX2 <- 1 - sapply(X2, function(x){mean(x %[]% cbind(Lt, Ut))})
    Ty2 <- TX2[D==0]; Tx2 <- TX2[D==1]
    
    Ai.2 <- ecdf(Ty2)(Tx2); Bj.2 <- ecdf(Tx2)(Ty2)
    var.gauc2 <- 1/n * (mean(Ai.2^2) - mean(Ai.2)^2) + 1/m * (mean(Bj.2^2) - mean(Bj.2)^2)
    
    cov.gauc1.gauc2 <- 1/n * (mean(Ai.1*Ai.2) - mean(Ai.1)*mean(Ai.2)) + 1/m * (mean(Bj.1*Bj.2) - mean(Bj.1)*mean(Bj.2))
    
    statistic.value <- weights[1] * gauc1 + weights[2] * gauc2
    sd.statistic <- sqrt(weights[1]^2 * var.gauc1 + weights[2]^2 * var.gauc2 + 2*weights[1]*weights[2] * cov.gauc1.gauc2) 
    
    CI <- c(statistic.value - z.alpha*sd.statistic, statistic.value + z.alpha*sd.statistic)
    
    p.value <- 2*(1 - pnorm(abs(statistic.value - null)/sd.statistic))
    
    results <- list(groc1 = groc1, groc2 = groc2, gauc1 = gauc1, gauc2 = gauc2, weights = c(-1,1), statistic.value = statistic.value, null = 0, alpha = 0.05, p.value = p.value, conf.int = CI)
    
  }
  
  return(results)
  
}
