# # The following code reproduces the simulation study in the paper “More than one way: Exploring the capabilities of
# # different estimation approaches to joint models for longitudinal and time-to-event outcomes.”
# # It is structured as follows:
# #   0. Load packages and functions
# #   1. Estimation
# #   2. Evaluation
# #    2.1 Estimation Accuracy: Generation of results and respective predictions + evaluation
# #    2.2 Variable Selection and Variable Tolerance: Generation of results and respective predictions + evaluation
# #    2.3 Prediction Precision: Evaluation

## ######################################################################################### ##
## 0. Load packages and functions ----------------------------------------------------------
## ######################################################################################### ##
rm(list = ls())
RNGversion("3.5.1")

## ######################################################################################### ##
## Please set me to the directory this source code lies in --------------------------------- ##
                                                                                      
path <- ""

SAVE <- F

## ----------------------------------------------------------------------------------------- ##
## ######################################################################################### ##

library(JM)
library(joineRML)
library(JMbayes)
library(assertthat)
library(parallel)
library(ggplot2)
library(stargazer)
library(ggpubr)
library(reshape2)
library(dplyr)
cbbPalette <- c('#e66101','#fdb863','#b2abd2','#5e3c99')

source(paste0(path, "01_Code/", "f_cv.R"))
source(paste0(path, "01_Code/", "f_JMboost.R"))
source(paste0(path, "01_Code/", "f_simJME_A.R"))
source(paste0(path, "01_Code/", "f_predFun.R"))
source(paste0(path, "01_Code/", "f_estFun.R"))
source(paste0(path, "01_Code/", "f_read_out_functions.R"))
source(paste0(path, "01_Code/", "f_JMpredSurv.R"))


n = 100
n_i = 5
alpha = 0.1
lambda = 0.4
int <- 1.5
betal <- c(-0.5, 0.7, 1.3, 0.3, 0.5)
betat <-  0.4
betals <- c(0.9, 0.3, -1, 0.2, -0.4)
betas <-  0.1
sigma2 <- 0.5

npred <- 1000

methods <- c("JM", "JMbayes", "joineRML", "JMboost")

# Storage vectors for Results
## Prediction
plist <- vector("list", 12)
names(plist) <- mods <- c(paste0("AM", 1:3), paste0(rep(paste0("V", 1:3), each = 3), paste0("M", 1:3)))

## Estimation
objnam <- c(paste0("resAM", 1:3), paste0(rep(paste0("resV", 1:3), each = 3), paste0("M", 1:3)))
for(i in 1:length(objnam)){
  assign(objnam[i], vector("list", 4))
}
names(resAM1) <- names(resAM2) <- names(resAM3) <- methods
names(resV1M1) <- names(resV1M2) <- names(resV1M3) <- methods
names(resV2M1) <- names(resV2M2) <- names(resV2M3) <- methods
names(resV3M1) <- names(resV3M2) <- names(resV3M3) <- methods

## ######################################################################################### ##
## 1. Estimation ---------------------------------------------------------------------------
## ######################################################################################### ##
# A ---------------------------------------------------------------------------------------------
noninfl <- noninfs <- noninfls <- 0

# generate results for Accuracy setting A/cdot and Predictions P/cdot and reading out necessary results to save memory
modelname <- "AM1"
for (mm in 1:4) {
  resAM1[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                  n = n, n_i = n_i, int = int, betal = 0, betas = betas, betals = c(betal, betals), betat = betat, alpha = alpha, lambda = lambda,
                  sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resAM1, file= paste0(path, "02_Results/02_Estimation/resAM1.RData"))
plist$AM1 <- Reduce("rbind",
              lapply(1:100, predFun, modelname = modelname,
                     n = npred, n_i = n_i, int = int, betal = 0, betas = betas, betals = c(betal, betals), betat = betat, alpha = alpha,
                     lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
              )
# read out information necessary for evaluation
AM1 <- readOutA(res = resAM1, modelname = "AM1")
if(SAVE) save(AM1, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resAM1)

modelname <- "AM2"
for (mm in 1:4) {
  resAM2[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                  n = n, n_i = n_i, int = int, betal = c(betal, betat), betas = betas, betals = 0, betat = 0, alpha = alpha, lambda = lambda,
                  sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resAM2, file= paste0(path, "02_Results/02_Estimation/resAM2.RData"))
plist$AM2 <- Reduce("rbind",
                    lapply(1:100, predFun, modelname = modelname,
                           n = npred, n_i = n_i, int = int, betal = c(betal, betat), betas = betas, betals = 0, betat = 0, alpha = alpha,
                           lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
                    )
# read out information necessary for evaluation
AM2 <- readOutA(res = resAM2, modelname = "AM2")
if(SAVE) save(AM2, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resAM2)

modelname <- "AM3"
for (mm in 1:4) {
  resAM3[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                  n = n, n_i = n_i, int = int, betal = betal, betas = betas, betals = betals, betat = betat, alpha = alpha, lambda = lambda,
                  sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resAM3, file= paste0(path, "02_Results/02_Estimation/resAM3.RData"))
plist$AM3 <- Reduce("rbind",
                    lapply(1:100, predFun, modelname = modelname,
                           n = npred, n_i = n_i, int = int, betal = betal, betas = betas, betals = betals, betat = betat, alpha = alpha,
                           lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
                    )
# read out information necessary for evaluation
AM3 <- readOutA(res = resAM3, modelname = "AM3")
if(SAVE) save(AM3, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resAM3)

# V1 ---------------------------------------------------------------------------------------------
noninfl <- noninfs <- noninfls <- 5

# Generate results for Variable Selection accross three different levels of complexity V/cdot and
# prediction results P/cdot and reading out necessary results to save memory
modelname <- "V1M1"
for (mm in 1:4) {
  resV1M1[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                          n = n, n_i = n_i, int = int, betal = 0, betas = betas, betals = c(betal, betals), betat = betat, alpha = alpha, lambda = lambda,
                          sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resV1M1, file= paste0(path, "02_Results/02_Estimation/resV1M1.RData"))
plist$V1M1 <- Reduce("rbind",
                     lapply(1:100, predFun, modelname = modelname,
                            n = npred, n_i = n_i, int = int, betal = 0, betas = betas, betals = c(betal, betals), betat = betat, alpha = alpha,
                            lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
)
# read out information necessary for evaluation
V1M1 <- readOutV(res = resV1M1, modelname = "V1M1")
if(SAVE) save(V1M1, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resV1M1)

modelname <- "V1M2"
for (mm in 1:4) {
  resV1M2[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                          n = n, n_i = n_i, int = int, betal = c(betal, betat), betas = betas, betals = 0, betat = 0, alpha = alpha, lambda = lambda,
                          sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resV1M2, file= paste0(path, "02_Results/02_Estimation/resV1M2.RData"))
plist$V1M2 <- Reduce("rbind",
                     lapply(1:100, predFun, modelname = modelname,
                            n = npred, n_i = n_i, int = int, betal = c(betal, betat), betas = betas, betals = 0, betat = 0, alpha = alpha,
                            lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
)
# read out information necessary for evaluation
V1M2 <- readOutV(res = resV1M2, modelname = "V1M2")
if(SAVE) save(V1M2, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resV1M2)

modelname <- "V1M3"
for (mm in 1:4) {
  resV1M3[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                          n = n, n_i = n_i, int = int, betal = betal, betas = betas, betals = betals, betat = betat, alpha = alpha, lambda = lambda,
                          sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resV1M3, file= paste0(path, "02_Results/02_Estimation/resV1M3.RData"))
plist$V1M3 <- Reduce("rbind",
                     lapply(1:100, predFun, modelname = modelname,
                            n = npred, n_i = n_i, int = int, betal = betal, betas = betas, betals = betals, betat = betat, alpha = alpha,
                            lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
)
# read out information necessary for evaluation
V1M3 <- readOutV(res = resV1M3, modelname = "V1M3")
if(SAVE) save(V1M3, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resV1M3)

# V2 ---------------------------------------------------------------------------------------------
noninfl <- noninfs <- noninfls <- 50

modelname <- "V2M1"
for (mm in 1:4) {
  resV2M1[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                          n = n, n_i = n_i, int = int, betal = 0, betas = betas, betals = c(betal, betals), betat = betat, alpha = alpha, lambda = lambda,
                          sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resV2M1, file= paste0(path, "02_Results/02_Estimation/resV2M1.RData"))
plist$V2M1 <- Reduce("rbind",
                     lapply(1:100, predFun, modelname = modelname,
                            n = npred, n_i = n_i, int = int, betal = 0, betas = betas, betals = c(betal, betals), betat = betat, alpha = alpha,
                            lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
)
# read out information necessary for evaluation
V2M1 <- readOutV(res = resV2M1, modelname = "V2M1")
if(SAVE) save(V2M1, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resV2M1)

modelname <- "V2M2"
for (mm in 1:4) {
  resV2M2[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                          n = n, n_i = n_i, int = int, betal = c(betal, betat), betas = betas, betals = 0, betat = 0, alpha = alpha, lambda = lambda,
                          sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resV2M2, file= paste0(path, "02_Results/02_Estimation/resV2M2.RData"))
plist$V2M2 <- Reduce("rbind",
                     lapply(1:100, predFun, modelname = modelname,
                            n = npred, n_i = n_i, int = int, betal = c(betal, betat), betas = betas, betals = 0, betat = 0, alpha = alpha,
                            lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
)
# read out information necessary for evaluation
V2M2 <- readOutV(res = resV2M2, modelname = "V2M2")
if(SAVE) save(V2M2, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resV2M2)

modelname <- "V2M3"
for (mm in 1:4) {
  resV2M3[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                          n = n, n_i = n_i, int = int, betal = betal, betas = betas, betals = betals, betat = betat, alpha = alpha, lambda = lambda,
                          sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resV2M3, file= paste0(path, "02_Results/02_Estimation/resV2M3.RData"))
plist$V2M3 <- Reduce("rbind",
                     lapply(1:100, predFun, modelname = modelname,
                            n = npred, n_i = n_i, int = int, betal = betal, betas = betas, betals = betals, betat = betat, alpha = alpha,
                            lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
)
# read out information necessary for evaluation
V2M3 <- readOutV(res = resV2M3, modelname = "V2M3")
if(SAVE) save(V2M3, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resV2M3)

# V3 -----------------------------------------------------------------------------------------------
noninfl <- noninfs <- noninfls <- 250

modelname <- "V3M1"
for (mm in 1:4) {
  resV3M1[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                          n = n, n_i = n_i, int = int, betal = 0, betas = betas, betals = c(betal, betals), betat = betat, alpha = alpha, lambda = lambda,
                          sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resV3M1, file= paste0(path, "02_Results/02_Estimation/resV3M1.RData"))
plist$V3M1 <- Reduce("rbind",
                     lapply(1:100, predFun, modelname = modelname,
                            n = npred, n_i = n_i, int = int, betal = 0, betas = betas, betals = c(betal, betals), betat = betat, alpha = alpha,
                            lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
)
# read out information necessary for evaluation
V3M1 <- readOutV(res = resV3M1, modelname = "V3M1")
if(SAVE) save(V3M1, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resV3M1)

modelname <- "V3M2"
for (mm in 1:4) {
  resV3M2[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                          n = n, n_i = n_i, int = int, betal = c(betal, betat), betas = betas, betals = 0, betat = 0, alpha = alpha, lambda = lambda,
                          sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resV3M2, file= paste0(path, "02_Results/02_Estimation/resV3M2.RData"))
plist$V3M2 <- Reduce("rbind",
                     lapply(1:100, predFun, modelname = modelname,
                            n = npred, n_i = n_i, int = int, betal = c(betal, betat), betas = betas, betals = 0, betat = 0, alpha = alpha,
                            lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
)
# read out information necessary for evaluation
V3M2 <- readOutV(res = resV3M2, modelname = "V3M2")
if(SAVE) save(V3M2, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resV3M2)

modelname <- "V3M3"
for (mm in 1:4) {
  resV3M3[[mm]] <- lapply(1:100, estFun, modelname = modelname, method = methods[mm],
                          n = n, n_i = n_i, int = int, betal = betal, betas = betas, betals = betals, betat = betat, alpha = alpha, lambda = lambda,
                          sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
}
if(SAVE) save(resV3M3, file= paste0(path, "02_Results/02_Estimation/resV3M3.RData"))
plist$V3M3 <- Reduce("rbind",
                     lapply(1:100, predFun, modelname = modelname,
                            n = npred, n_i = n_i, int = int, betal = betal, betas = betas, betals = betals, betat = betat, alpha = alpha,
                            lambda = lambda, sigma2 = sigma2, noninfl = noninfl, noninfs = noninfs, noninfls = noninfls)
)
# read out information necessary for evaluation
V3M3 <- readOutV(res = resV3M3, modelname = "V3M3")
if(SAVE) save(V3M3, file= paste0(path, "02_Results/02_Estimation/", modelname, ".RData"))
if(SAVE) save(plist, file= paste0(path, "02_Results/02_Estimation/Pred_Dat_all.RData"))
rm(resV3M3)

## ######################################################################################### ##
## 2. Evaluation ---------------------------------------------------------------------------
## ######################################################################################### ##
# ## Following code lines only relevant when SAVE <- T. Otherwise files to load do not exits
# for (i in mods) {load(paste0(path, "02_Results/02_Estimation/", i, ".RData"))}
# load(paste0(path, "02_Results/02_Estimation/", "Pred_Dat_all.RData"))


## ######################################################################################### ##
## 2.1 Estimation Accuracy -----------------------------------------------------------------
## ######################################################################################### ##
for(j in 1:4){
  if(j < 4) {
    colnames(AM1[[j]]$vars) <- c("sigma", "B00", "B10", "B01", "B11")
    colnames(AM2[[j]]$vars) <- c("sigma", "B00", "B10", "B01", "B11")
    colnames(AM3[[j]]$vars) <- c("sigma", "B00", "B10", "B01", "B11")
  } else {
    colnames(AM1[[j]]$vars) <- c("sigma")
    colnames(AM2[[j]]$vars) <- c("sigma")
    colnames(AM3[[j]]$vars) <- c("sigma")
  }
}

packnamdat <- data.frame("pack" = c("JM", "JRML", "JMbayes", "JMb"), "pack2" = c("JM", "joineRML", "JMbayes", "JMboost"))
packnamdat$pack2 <- factor(packnamdat$pack2, levels =  c("JM", "joineRML", "JMbayes", "JMboost"))

plotdat1 <- ToDataFrame("AM1")
plotdat2 <- ToDataFrame("AM2")
plotdat3 <- ToDataFrame("AM3")

t.alpha = 0.1
t.lambda = 0.4
t.int <- 1.5
t.betal <- c(-0.5, 0.7, 1.3, 0.3, 0.5)
t.betat <-  0.4
t.betals <- c(0.9, 0.3, -1, 0.2, -0.4)
t.betas <-  0.1
t.sigma2 <- 0.5
t.B00 <- 2
t.B01 <- 0
t.B11 <- .1
t.AM1 <- data.frame("true.values" = c(t.alpha, t.lambda, t.int, t.betat, t.betal, t.betals, t.betas, t.sigma2, t.B00, t.B01, t.B11),
                    "ind" = c("alpha", "lambda", "Intercept", "time", "Xls", paste0("Xls.", 1:9), "Xs", "sigma", "B00", "B01", "B11"),
                    "model" = "AM1")
plotdat1 <- merge(plotdat1, t.AM1, by = c("ind", "model"), all.x = T)

plotdat1 <- subset(plotdat1, !ind %in% c("V2", "B10"))
plotdat1$ind <- gsub("B01", "B01/ B10", plotdat1$ind)
lev <- levels(factor(plotdat1$ind))
plotdat1$ind <- factor(plotdat1$ind,
                       levels = lev[c(5, 8:18, 7, 1, 2:4, 6)])
                       # levels = c('Intercept','X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','time','Xs','alpha','B00','B01/ B10','B11','sigma'))
plotdat1 <- merge(plotdat1, packnamdat, all.x = T)

# plot coefficients
M1 <- ggplot(subset(plotdat1, pred != "vars"), aes(x = ind, y = values, fill = pack2)) +
  geom_errorbar(aes(x = ind, ymax = true.values, ymin = true.values, colour="#AA0000")) +
  geom_boxplot(outlier.shape = NA) +
  # facet_grid(model ~ pred)
  facet_grid(model~pred, scales = "free", space = "free") +
  # scale_fill_manual(labels = c("JM", "joineRML", "JMbayes", "JMboost"), values = cbbPalette) +
  scale_fill_manual(values = cbbPalette) +
  scale_x_discrete(labels = c("Intercept" = expression(beta[l0]),
                              "Xls" = expression(beta[ls1]),
                              "Xls.1" = expression(beta[ls2]),
                              "Xls.2" = expression(beta[ls3]),
                              "Xls.3" = expression(beta[ls4]),
                              "Xls.4" = expression(beta[ls5]),
                              "Xls.5" = expression(beta[ls6]),
                              "Xls.6" = expression(beta[ls7]),
                              "Xls.7" = expression(beta[ls8]),
                              "Xls.8" = expression(beta[ls9]),
                              "Xls.9" = expression(beta[ls10]),
                              "time" = expression(beta[t]),
                              "Xs" = expression(beta[s]),
                              "alpha" = expression(alpha))) +
  coord_cartesian(ylim = c(-1.5, 2)) +
  # geom_errorbar(aes(x = ind, ymax = true.values, ymin = true.values, colour="#AA0000")) +
  # geom_hline(aes(yintercept = 0, colour = "grey"), colour = "darkgrey") +
  geom_hline(aes(yintercept = 0), colour = "darkgrey") +
  labs(x = "",
       y = "",
       title = "Estimation results of simulation study for Estimation Accuracy by package: Coefficients") +
  scale_colour_discrete(labels = c("True value")) +
  theme(legend.title = element_blank(), axis.text.x = element_text(size = 12)) 

# plot variances
V1 <- ggplot(subset(plotdat1, pred == "vars"), aes(x = ind, y = values, fill = pack2)) +
  geom_errorbar(aes(x = ind, ymax = true.values, ymin = true.values, colour="#AA0000")) +
  geom_boxplot(outlier.shape = NA) +
  # facet_grid(model ~ pred)
  facet_grid(model~pred, scales = "free", space = "free") +
  scale_fill_manual(values = cbbPalette) +
  scale_x_discrete(labels = c("B00" = expression(B["00"]),
                              "B01/ B10" = expression(B["01"] *"/ "* B["10"]),
                              "B11" = expression(B["11"]),
                              "sigma" = expression(sigma))) +
  coord_cartesian(ylim = c(-1, 5)) +
  labs(x = "",
       y = "",
       title = "Estimation results of simulation study for Estimation Accuracy by package: Variances")+
  scale_colour_discrete(labels = "True value") +
  theme(legend.title = element_blank(), axis.text.x = element_text(size = 12)) 

# add true values for AM2
t.AM2 <- data.frame("true.values" = c(t.alpha, t.lambda, t.int, 0, t.betal, t.betat, t.betas, t.sigma2, t.B00, t.B01, t.B11),
                    "ind" = c("alpha", "lambda", "Intercept", "time", "Xl", paste0("Xl.", 1:5), "Xs", "sigma", "B00", "B01", "B11"),
                    "model" = "AM2")
plotdat2 <- merge(plotdat2, t.AM2, by = c("ind", "model"), all.x = T)
test <- c(which(plotdat2$pred == "shared"), which(plotdat2$ind == "Intercept"))
test <- test[duplicated(test)]
plotdat2 <- plotdat2[-test, ]

plotdat2 <- subset(plotdat2, ind != "B10")
plotdat2$ind <- gsub("B01", "B01/ B10", plotdat2$ind)
plotdat2$ind <- factor(plotdat2$ind,
                       levels = c('Intercept','Xl', 'Xl.1','Xl.2','Xl.3','Xl.4','Xl.5', 'time','Xs','alpha','B00','B01/ B10','B11','sigma'))
plotdat2 <- merge(plotdat2, packnamdat, all.x = T)

# plot coefficients
M2 <- ggplot(subset(plotdat2, pred != "vars"), aes(x = ind, y = values, fill = pack2)) +
  geom_errorbar(aes(x = ind, ymax = true.values, ymin = true.values, colour="#AA0000")) +
  geom_boxplot(outlier.shape = NA) +
  # facet_grid(model ~ pred)
  facet_grid(model~pred, scales = "free", space = "free") +
  scale_fill_manual(values = cbbPalette) +
  scale_x_discrete(labels = c("Intercept" = expression(beta[l0]),
                              "Xl" = expression(beta[l1]),
                              "Xl.1" = expression(beta[l2]),
                              "Xl.2" = expression(beta[l3]),
                              "Xl.3" = expression(beta[l4]),
                              "Xl.4" = expression(beta[l5]),
                              "Xl.5" = expression(beta[l6]),
                              "time" = expression(beta[t]),
                              "Xs" = expression(beta[s]),
                              "alpha" = expression(alpha))) +
  coord_cartesian(ylim = c(-1.5, 2)) +
  geom_hline(aes(yintercept = 0), colour = "darkgrey") +
  labs(x = "", y = "") +
  scale_colour_discrete(labels = "True value") +
  theme(legend.title = element_blank(), axis.text.x = element_text(size = 12))  


# plot variances
V2 <- ggplot(subset(plotdat2, pred == "vars"), aes(x = ind, y = values, fill = pack2)) +
  geom_errorbar(aes(x = ind, ymax = true.values, ymin = true.values, colour="#AA0000")) +
  geom_boxplot(outlier.shape = NA) +
  # facet_grid(model ~ pred)
  facet_grid(model~pred, scales = "free", space = "free") +
  scale_fill_manual(values = cbbPalette) +
  scale_x_discrete(labels = c("B00" = expression(B["00"]),
                              "B01/ B10" = expression(B["01"] *"/ "* B["10"]),
                              "B11" = expression(B["11"]),
                              "sigma" = expression(sigma))) +
  coord_cartesian(ylim = c(-1, 5)) +
  labs(x = "", y = "") +
  scale_colour_discrete(labels = "True value") +
  theme(legend.title = element_blank(), axis.text.x = element_text(size = 12)) 


# add true values for AM3
t.AM3 <- data.frame("true.values" = c(t.alpha, t.lambda, t.int, t.betat, t.betal, t.betals, t.betas, t.sigma2, t.B00, t.B01, t.B11),
                    "ind" = c("alpha", "lambda", "Intercept", "time", "Xl", paste0("Xl.", 1:4), "Xls", paste0("Xls.", 1:4), "Xs", "sigma", "B00", "B01", "B11"),
                    "model" = "AM3")
plotdat3 <- merge(plotdat3, t.AM3, by = c("ind", "model"), all.x = T)

plotdat3 <- subset(plotdat3, ind !="B10")
plotdat3$ind <- gsub("B01", "B01/ B10", plotdat3$ind)
plotdat3$ind <- factor(plotdat3$ind,
                       levels = c('Intercept','Xl', 'Xl.1','Xl.2','Xl.3','Xl.4', 'Xls', 'Xls.1','Xls.2','Xls.3','Xls.4','time','Xs','alpha','B00','B01/ B10','B11','sigma'))
plotdat3 <- merge(plotdat3, packnamdat, all.x = T)

# plot coefficients
M3 <- ggplot(subset(plotdat3, pred != "vars"), aes(x = ind, y = values, fill = pack2)) +
  geom_errorbar(aes(x = ind, ymax = true.values, ymin = true.values, colour="#AA0000")) +
  geom_boxplot(outlier.shape = NA) +
  # facet_grid(model ~ pred)
  facet_grid(model~pred, scales = "free", space = "free") +
  scale_fill_manual(values = cbbPalette) +
  scale_x_discrete(labels = c("Intercept" = expression(beta[l0]),
                              "Xl" = expression(beta[l1]),
                              "Xl.1" = expression(beta[l2]),
                              "Xl.2" = expression(beta[l3]),
                              "Xl.3" = expression(beta[l4]),
                              "Xl.4" = expression(beta[l5]),
                              "Xls" = expression(beta[ls1]),
                              "Xls.1" = expression(beta[ls2]),
                              "Xls.2" = expression(beta[ls3]),
                              "Xls.3" = expression(beta[ls4]),
                              "Xls.4" = expression(beta[ls5]),
                              "time" = expression(beta[t]),
                              "Xs" = expression(beta[s]),
                              "alpha" = expression(alpha))) +
  coord_cartesian(ylim = c(-1.5, 2)) +
  geom_hline(aes(yintercept = 0), colour = "darkgrey") +
  labs(x = "", y = "") + 
  scale_colour_discrete(labels = "True value") +
  theme(legend.title = element_blank(), axis.text.x = element_text(size = 12)) 

# plot variances
V3 <- ggplot(subset(plotdat3, pred == "vars"), aes(x = ind, y = values, fill = pack2)) +
  geom_errorbar(aes(x = ind, ymax = true.values, ymin = true.values, colour="#AA0000")) +
  geom_boxplot(outlier.shape = NA) +
  # facet_grid(model ~ pred)
  facet_grid(model~pred, scales = "free", space = "free") +
  scale_fill_manual(labels = c("JM", "joineRML", "JMbayes", "JMboost"), values = cbbPalette) +
  scale_x_discrete(labels = c("B00" = expression(B["00"]),
                              "B01/ B10" = expression(B["01"] *"/ "* B["10"]),
                              "B11" = expression(B["11"]),
                              "sigma" = expression(sigma))) +
  coord_cartesian(ylim = c(-1, 5)) +
  labs(x = "", y = "") + 
  scale_colour_discrete(labels = "True value") +
  theme(legend.title = element_blank(), axis.text.x = element_text(size = 12)) 

tmp <- list(plotdat1, plotdat2, plotdat3)
save(tmp, file = paste0(path, "03_Output/", "AM_Data_Est_Acc_plot.RData"))
rm(tmp)

# combine plots
ggarrange(M1, M2, M3, nrow = 3, common.legend = T, legend = "bottom")
ggsave(paste0(path, "03_Output/", "AM_eta_vert.jpeg"), width = 210, height = 225, units = "mm")
ggarrange(V1, V2, V3, nrow = 3, common.legend = T, legend = "bottom")
ggsave(paste0(path, "03_Output/", "AM_var_vert.jpeg"), width = 210, height = 225, units = "mm")

# calculate MSE
plotdat1 <- cbind(plotdat1, "MSE" = with(plotdat1, (values - true.values)^2))
plotdat2 <- cbind(plotdat2, "MSE" = with(plotdat2, (values - true.values)^2))
plotdat3 <- cbind(plotdat3, "MSE" = with(plotdat3, (values - true.values)^2))

dat1 <- aggregate(MSE ~ pack2 + ind, data = plotdat1, mean)
dat2 <- aggregate(MSE ~ pack2 + ind, data = plotdat2, mean)
dat3 <- aggregate(MSE ~ pack2 + ind, data = plotdat3, mean)
dat1 <- dcast(dat1, pack2~ind)
dat2 <- dcast(dat2, pack2~ind)
dat3 <- dcast(dat3, pack2~ind)
row.names(dat1) <- dat1$pack2
row.names(dat2) <- dat2$pack2
row.names(dat3) <- dat3$pack2
dat1 <- dat1[, -grep("pack", names(dat1))]
dat2 <- dat2[, -grep("pack", names(dat2))]
dat3 <- dat3[, -grep("pack", names(dat3))]
dat1 <- dat1[, c(1:14, 18, 15:17)]
dat2 <- dat2[, c(1:10, 14, 11:13)]
dat3 <- dat3[, c(1:14, 18, 15:17)]

MSEres <- rbind(data.frame("Model" = "AM1", "parameter" = rownames(t(dat1)), t(dat1)),
                data.frame("Model" = "AM2", "parameter" = rownames(t(dat2)), t(dat2)),
                data.frame("Model" = "AM3", "parameter" = rownames(t(dat3)), t(dat3)))
MSEres[, c("JM", "joineRML", "JMbayes", "JMboost")] <- round(MSEres[,c("JM", "joineRML", "JMbayes", "JMboost")], 4)
options(scipen = 999)

stargazer(MSEres, title = "MSE-values by model", summary = FALSE, rownames = FALSE,
          type = "html", out = paste0(path, "03_Output/", "AM_Table_MSE.html"))
options(scipen = 0)

## ######################################################################################### ##
## 2.2 Variable Selection and Variable Tolerance -------------------------------------------
## ######################################################################################### ##
## Evaluate Variable Selection ######################### ##
# harmonize labels and add model indicator
names(V1M1$JM$dat)[names(V1M1$JM$dat) %in% c("X2.5..", "X97.5..", "est.")]<- c("X2.5.","est", "X97.5.")
names(V1M2$JM$dat)[names(V1M2$JM$dat) %in% c("X2.5..", "X97.5..", "est.")]<- c("X2.5.","est", "X97.5.")
names(V1M3$JM$dat)[names(V1M3$JM$dat) %in% c("X2.5..", "X97.5..", "est.")]<- c("X2.5.","est", "X97.5.")
names(V2M1$JM$dat)[names(V2M1$JM$dat) %in% c("X2.5..", "X97.5..", "est.")]<- c("X2.5.","est", "X97.5.")
names(V2M2$JM$dat)[names(V2M2$JM$dat) %in% c("X2.5..", "X97.5..", "est.")]<- c("X2.5.","est", "X97.5.")
names(V2M3$JM$dat)[names(V2M3$JM$dat) %in% c("X2.5..", "X97.5..", "est.")]<- c("X2.5.","est", "X97.5.")
names(V3M1$JM$dat)[names(V3M1$JM$dat) %in% c("X2.5..", "X97.5..", "est.")]<- c("X2.5.","est", "X97.5.")
names(V3M2$JM$dat)[names(V3M2$JM$dat) %in% c("X2.5..", "X97.5..", "est.")]<- c("X2.5.","est", "X97.5.")
names(V3M3$JM$dat)[names(V3M3$JM$dat) %in% c("X2.5..", "X97.5..", "est.")]<- c("X2.5.","est", "X97.5.")

mod <- paste0(rep(paste0("V", 1:3), each = 3), paste0("M", 1:3))
res <- lapply(mod, function(x) {print(x); data.frame(Reduce(rbind, lapply(get(x), function(x) x[[1]])),
                                          "model" = x)}
              )
res <- Reduce(rbind, res)
rm(mod)

res$ind <- sub("_1", "", res$ind)
res$ind[grep("T.Assoc|T.gamma", res$ind)] <- "alpha"
res$ind[grep("time_effect|T_long", res$ind)] <- "time"
res$ind <- sub("(Intercept)", "Intercept", res$ind, fixed = T)

res$pack[grep("JMBayes", res$pack, fixed = T)] <- "JMbayes"
res$pack[grep("joneRML", res$pack, fixed = T)] <- "joineRML"
res$pack[grep("JRML", res$pack, fixed = T)] <- "joineRML"

# adding true values
res$model2 <- substr(res$model, 3,4)
t.alpha = 0.1
t.lambda = 0.4
t.int <- 1.5
t.betal <- c(-0.5, 0.7, 1.3, 0.3, 0.5)
t.betat <-  0.4
t.betals <- c(0.9, 0.3, -1, 0.2, -0.4)
t.betas <-  0.1
t.sigma2 <- 0.5
t.B00 <- 2
t.B01 <- 0
t.B11 <- .1

t.AM1 <- data.frame("true.values" = c(t.alpha, t.lambda, t.int, t.betat, t.betal, t.betals, t.betas, t.sigma2, t.B00, t.B01, t.B11),
                    "ind" = c("alpha", "lambda", "Intercept", "time", "Xls", paste0("Xls.", 1:9), "Xs", "sigma", "B00", "B01", "B11"),
                    "model" = "M1")

t.AM2 <- data.frame("true.values" = c(t.alpha, t.lambda, t.int, 0, t.betal, t.betat, t.betas, t.sigma2, t.B00, t.B01, t.B11),
                    "ind" = c("alpha", "lambda", "Intercept", "time", "Xl", paste0("Xl.", 1:5), "Xs", "sigma", "B00", "B01", "B11"),
                    "model" = "M2")

t.AM3 <- data.frame("true.values" = c(t.alpha, t.lambda, t.int, t.betat, t.betal, t.betals, t.betas, t.sigma2, t.B00, t.B01, t.B11),
                    "ind" = c("alpha", "lambda", "Intercept", "time", "Xl", paste0("Xl.", 1:4), "Xls", paste0("Xls.", 1:4), "Xs", "sigma", "B00", "B01", "B11"),
                    "model" = "M3")
t.dat <- rbind(t.AM1, t.AM2, t.AM3)
rm(t.AM1, t.AM2, t.AM3)

# merge true values with simulation results
res <- merge(res, t.dat, all.x = T, by.x =c("ind", "model2"), by.y = c("ind", "model") , sort = FALSE)
res$true.values[grep("n.", res$ind)] <- 0

# calculate False-positive rates
res$FP <- ifelse(with(res, X2.5. <= true.values & true.values <= X97.5.), 1, 0)
ident <- which(res$pack == "JMboost")
res$FP[ident] <- ifelse(res$est[ident] != 0, 1, 0)
res$ninf <- FALSE
res$ninf[grep("n.", res$ind)] <- TRUE
res$FP[res$ninf == FALSE] <- NA

# calculate False-negative rates
res$FN <- ifelse(res$true.values < res$X2.5. | res$X97.5. < res$true.values, 1, 0)
ident <- which(res$pack == "JMboost")
res$FN[ident] <- ifelse(res$est[ident] == 0, 1, 0)
res$FN[res$ninf] <- NA

# calculate True-positive rates
res$TP <- ifelse(with(res, X2.5. <= true.values & true.values <= X97.5.), 0, 1)
ident <- which(res$pack == "JMboost")
res$TP[ident] <- ifelse(res$est[ident] != 0, 0, 1)
res$ninf <- FALSE
res$ninf[grep("n.", res$ind)] <- TRUE
res$TP[res$ninf == FALSE] <- NA

# calculate True-negative rates
res$TN <- ifelse(res$true.values < res$X2.5. | res$X97.5. < res$true.values, 0, 1)
ident <- which(res$pack == "JMboost")
res$TN[ident] <- ifelse(res$est[ident] == 0, 0, 1)
res$TN[res$ninf] <- NA

res$R <- as.numeric(res$R)

# Calculate mean FP-/FN-rates per pack, model and simulation iteration
sum10 <- aggregate(FP ~ model + pack + R, data = res, mean)
sum20 <- aggregate(FN ~ model + pack + R, data = res, mean)
sum30 <- aggregate(TP ~ model + pack + R, data = res, mean)
sum40 <- aggregate(TN ~ model + pack + R, data = res, mean)

# Calculate average of mean FP-/FN-rates, std. deviation, min and max
sum1 <- aggregate(FP ~ model + pack, data = sum10, FUN = function(x) c(n = sum(!is.na(x)), mean = mean(x), median = median(x), se = sd(x), min = min(x), max = max(x)))
sum2 <- aggregate(FN ~ model + pack, data = sum20, FUN = function(x) c(n = sum(!is.na(x)), mean = mean(x), median = median(x), se = sd(x), min = min(x), max = max(x)))
sum3 <- aggregate(TP ~ model + pack, data = sum30, FUN = function(x) c(n = sum(!is.na(x)), mean = mean(x), median = median(x), se = sd(x), min = min(x), max = max(x)))
sum4 <- aggregate(TN ~ model + pack, data = sum40, FUN = function(x) c(n = sum(!is.na(x)), mean = mean(x), median = median(x), se = sd(x), min = min(x), max = max(x)))
plotdat0 <- merge(sum1, sum2, by = c("model", "pack"), all = T)
plotdat0 <- data.frame(as.matrix(plotdat0))
plotdat0[, 3:ncol(plotdat0)] <- apply(plotdat0[, 3:ncol(plotdat0)], 2, as.numeric)

# data for table including aggregated TP/TF rates for
tabdat <- merge(sum1, sum2, by = c("model", "pack"), all = T)
tabdat <- merge(tabdat, sum3, by = c("model", "pack"), all = T)
tabdat <- merge(tabdat, sum4, by = c("model", "pack"), all = T)
tabdat <- data.frame(as.matrix(tabdat))
tabdat[, 3:ncol(tabdat)] <- apply(tabdat[, 3:ncol(plotdat0)], 2, as.numeric)

sdat0 <- data.frame("model" = rep(paste0(rep(paste0("V", 1:3), each = 3), paste0("M", 1:3)), each = 4),
                    "pack" = rep(c("JM", "joineRML", "JMbayes", "JMboost"), 9))
sdat <- tabdat[, c(1:2, grep("FN|FP", names(tabdat)))]
sdat <- sdat0 %>% left_join(sdat)
sdat <- sdat[, -grep("mean|se|FN.n", names(sdat))]
sdat[, 4:9] <- round(sdat[, 4:9], 3)
sdat$FP.range <- paste0("[", format(sdat$FP.min, nsmall = 3), "; ", format(sdat$FP.max, nsmall = 3), "]")
sdat$FN.range <- paste0("[", format(sdat$FN.min, nsmall = 3), "; ", format(sdat$FN.max, nsmall = 3), "]")
# sdat$FP.range <- paste0("[", sdat$FP.min, "; ", sdat$FP.max, "]")
# sdat$FN.range <- paste0("[", sdat$FN.min, "; ", sdat$FN.max, "]")
sdat <- sdat[, -grep("min|max", names(sdat))]
sdat <- sdat[, c(1:4, 6, 5, 7)]
sdat$FP <- paste(format(sdat$FP.median, nsmall = 3), sdat$FP.range)
sdat$FN <- paste(format(sdat$FN.median, nsmall = 3), sdat$FN.range)
sdat <- sdat[, -grep("median|range", names(sdat))]
R <- max(unique((res$R)))
sdat$fails <- R - sdat$FP.n
# sdat$fails <- 100 - sdat$FP.n
sdat <- sdat[, c(1:3, 6, 4:5)]
sdat$FP.n[is.na(sdat$FP.n)] <- 0
sdat$fails[is.na(sdat$fails)] <- 100
sdat$FP[grep("NA", sdat$FP)] <- ""
sdat$FN[grep("NA", sdat$FN)] <- ""
names(sdat) <- c("Model", "Software", "No. of available", "No. of", 
                 "Median false-positive", "Median false-negative" )
sdat <- rbind(sdat, c("", "", "simulations", "failures", "rate [min; max]", "rate [min; max]"))
sdat <- sdat[c(37, 1:36),]

# # Table with FP and TN rates
stargazer(sdat,
          title = "Median of false-positive and -negative rates of packages by model and dimensionality",
          # type = "latex", 
          type = "html", out = paste0(path, "03_Output/", "VM_Table_FP_FN_rates.html"),
          # type = "text",
          summary = FALSE,
          rownames=FALSE)
rm(sum10, sum20, sum1, sum2, sum3, sum30, sum4, sum40)

# Add Graphic for TN/FP-rates
plotdat0$pack <- factor(plotdat0$pack, levels = c("JM", "joineRML", "JMbayes", "JMboost"))
# save(plotdat0, file = paste0(path, "03_Output/", "Var_selection_0_plot_data.RData"))

plotdat1 <- data.frame(plotdat0[, c(1:2, grep("FP", names(plotdat0)))], "ind" = "False Positive Rate")
names(plotdat1) <- gsub("FP.", "", names(plotdat1))
plotdat2 <- data.frame(plotdat0[, c(1:2, grep("FN", names(plotdat0)))], "ind" = "False Negative Rate")
names(plotdat2) <- gsub("FN.", "", names(plotdat2))
plotdat <- rbind(plotdat1, plotdat2)
plotdat$ind <- factor(plotdat$ind, levels = c("False Positive Rate", "False Negative Rate"))
save(plotdat, file = paste0(path, "03_Output/", "VM_Var_selection_plot_data.RData"))

VS <- ggplot(plotdat, aes(x = pack, y = median, fill = ind)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = min, ymax = max, col = "#636363"), #col = "#636363", 
                width = .1,
                position = position_dodge(.9)) +
  facet_wrap(model~.) +
  labs(title = "Median False Positive and False Negative Rates",
       subtitle = "by package and model",
       x = "",
       y = "") +
  theme(legend.title = element_blank(),
        legend.position = "bottom",
        axis.text.x = element_text(angle = -45, vjust = 0.3, hjust = 0)) +
  scale_fill_manual(values = c("#2c7fb8", "#7fcdbb")) +
  scale_color_manual(labels = c("Range (min, max)"), values = "#636363")
VS
ggsave(plot = VS, filename = paste0(path, "03_Output/", "VM_Variable_selection.jpeg"), width = 210, height = 225, units = "mm")


## Evaluate Variable Tolerance ######################### ##
## Add AM failure rates
tmp1 <- lapply(AM1, function(x) table(is.na(x$vars[,1])))
tmp2 <- lapply(AM2, function(x) table(is.na(x$vars[,1])))
tmp3 <- lapply(AM3, function(x) table(is.na(x$vars[,1])))

failresA <- c()
for(j in 1:3) {
  failresA <- rbind(failresA,
                    data.frame(Reduce(rbind,
                                      lapply(get(paste0("tmp", j)), function(x) {
                                        dummy <- c("FALSE" = NA, "TRUE" = NA)
                                        dummy[match(names(x),  names(dummy))] <- x
                                        return(dummy)
                                      })
                    ),
                    "pack" = names(get(paste0("AM", j))),
                    "model" = paste0("AM", j)
                    )
  )
}
failresA$pack <- sub("JRML", "joineRML", failresA$pack, fixed = T)
failresA$pack <- sub("JMb", "JMboost", failresA$pack, fixed = T)
failresA$pack <- sub("JMboostayes", "JMbayes", failresA$pack, fixed = T)

# Extract V/cdotM failure rates
failres <- c()
for(i in 1:3){
  for(j in 1:3) {
    failres <- rbind(failres,
                     data.frame(Reduce(rbind,
                                       lapply(get(paste0("V", i, "M", j)), function(x) {
                                         dummy <- c("FALSE" = NA, "TRUE" = NA)
                                         dummy[match(names(x[[2]]),  names(dummy))] <- x[[2]]
                                         return(dummy)
                                       })
                     ),
                     "pack" = names(get(paste0("V", i, "M", j))),
                     "model" = paste0("V", i, "M", j)
                     )
    )
  }
}
failres$pack <- sub("JRML", "joineRML", failres$pack, fixed = T)
failres$pack <- sub("JMb", "JMboost", failres$pack, fixed = T)
failres$pack <- sub("JMboostayes", "JMbayes", failres$pack, fixed = T)

failres <- rbind(failresA, failres)
failres <- failres[!is.na(failres$model), ]
failres$TRUE.[is.na(failres$TRUE.)] <- 0
failres[!failres$TRUE. %in% c(0, 100), ]
with(failres[grep("V2", failres$model), ], sort(TRUE.))
failres$pack <- factor(failres$pack, levels = c("JM", "joineRML", "JMbayes", "JMboost"))
failres$label <- ifelse(failres$TRUE. == 0, NA, paste(failres$TRUE., "%"))
failres$vjust <- ifelse(failres$TRUE. >= 25, 1.5, -0.5)
save(failres, file = paste0(path, "03_Output/", "VM_Var_failure_plot_data.RData"))

# Plot failure rates
VF <- ggplot(failres, aes(y = TRUE., x= model, fill = pack)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = label, vjust = vjust)) +
  facet_grid(pack ~ .) +
  labs(title = "Failure rates by package routine and model",
       x = "",
       y = "") +
  scale_fill_manual(name = "", values = cbbPalette) +
  theme(legend.position = "none",
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        # panel.grid.major.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())
VF
ggsave(plot = VF, filename = paste0(path, "03_Output/", "VM_Variable_failure.jpeg"), width = 200, height = 200, units = "mm")


## ######################################################################################### ##
## 2.3 Prediction Precision ----------------------------------------------------------------
## ######################################################################################### ##
rm(list = ls()[grep("tmp|resV|fail|PM|dat", ls())])

polish <-  function(x) paste0(format(round(median(x, na.rm = T), 2), nsmall = 2), " [", 
                              format(round(min(x, na.rm = T), 2), nsmall = 2), "; ", 
                              format(round(max(x, na.rm = T), 2), nsmall = 2), "]")

resP <- lapply(plist, function(z) {
  tapply(z$MSPE, z[, c("pack", "type")], polish)
})

resP <- lapply(1:length(resP), function(x) {
  data.frame(resP[[x]], "pack" = rownames(resP[[x]]), "model" = paste0("P", names(resP)[x]))
})

resP <- Reduce("rbind", resP)
resP$model <- as.character(resP$model)
dim <- substr(unique(resP$model), 1, nchar(unique(resP$model))-2)
model <- substr(unique(resP$model), nchar(unique(resP$model))-1, nchar(unique(resP$model)))
MSPElong <- data.frame("Dimension" = dim, "Model" = model, unstack(resP[,c(1,3)]))
MSPElong <- MSPElong[, c(1,2,3,6,4,5)]
for(x in 3:6) {print(MSPElong[grep("NA", MSPElong[, x]),x] <- "")}
MSPEsurv <- data.frame("Dimension" = dim, "Model" = model, unstack(resP[,c(2,3)]))
MSPEsurv <- MSPEsurv[, c(1,2,3,6,4,5)]
for(x in 3:6) {print(MSPEsurv[grep("NA", MSPEsurv[, x]),x] <- "")}

# # Create table with MSPE values
options(scipen = 999)  
stargazer(MSPElong,
          title = "MSPE-values for marginal prediction of longitudinal outcome by model, dimensionality and package", summary = FALSE,
          # type = "latex",
          type = "html", out = paste0(path, "03_Output/", "PM_Table_MSPE_long.html"),
          # type = "text",
          rownames = FALSE)
options(scipen = 0)

options(scipen = 999)  
stargazer(MSPEsurv,
          title = "MSPE-values for subject-specific prediction of the survival probability by model, dimensionality and package", summary = FALSE,
          # type = "latex",
          type = "html", out = paste0(path, "03_Output/", "PM_Table_MSPE_surv.html"),
          # type = "text",
          rownames = FALSE)
options(scipen = 0)

# ## plots
pdat <- Reduce("rbind", plist)
pdat$modelname <- as.character(pdat$modelname)
pdat$dim <- paste0("P", substr(pdat$modelname, 1, nchar(pdat$modelname)-2))
pdat$model <- substr(pdat$modelname, nchar(pdat$modelname)-1, nchar(pdat$modelname))
pdat$pack <- factor(pdat$pack)
pdat$pack <- factor(pdat$pack, levels = c("JM", "joineRML", "JMbayes", "JMboost"))
save(pdat, file = paste0(path, "03_Output/", "PM_Pred_plot_data.RData"))

pdatLong <- subset(pdat, type == "Long")
pdatSurv <- subset(pdat, type == "Surv")

PPL <- ggplot(pdatLong, aes(x = dim, y = MSPE, fill = pack)) +
  geom_boxplot(varwidth = T, outlier.shape = NA) +
  facet_grid(pack ~ model) +
  coord_cartesian(ylim = c(0,10)) +
  theme(legend.position = "bottom") +
  guides(fill = FALSE) +
  scale_fill_manual(values = cbbPalette) +
  labs(x= "", 
       y = "",
       title = "Precision of marginal prediction of longitudinal outcome",
       subtitle = "as Mean Squared Prediction Error (MSPE) by model, complexity and package")
PPL
ggsave(plot = PPL, filename = paste0(path, "03_Output/", "PM_MSPE_Long.jpeg"), width = 200, height = 200, units = "mm")

PPS <- ggplot(pdatSurv, aes(x = dim, y = MSPE, fill = pack)) +
  geom_boxplot(varwidth = T, outlier.shape = NA) +
  facet_grid(pack ~ model) +
  coord_cartesian(ylim = c(0.0,.45)) +
  theme(legend.position = "bottom") +
  guides(fill = FALSE) +
  scale_fill_manual(values = cbbPalette) +
  labs(x= "",
       y = "",
       title = "Precision of individual-specific prediction of survival probabilities",
       subtitle = "as Mean Squared Prediction Error (MSPE) by model, complexity and package")
PPS
ggsave(plot = PPS, filename = paste0(path, "03_Output/", "PM_MSPE_Surv.jpeg"), width = 200, height = 200, units = "mm")



