############### Load required libraries ###############
library(rio) # data importing
library(writexl) # Data exporting
library(tidyverse) # Data wrangling
library(pdftools) # PDF scraping
library(stringr) # working with strings
library(ggpubr) # creating graphs
library(gtsummary) # creating publication ready tables
library(shiny) # Shiny Interface
library(shinythemes)


############### Creating a User Interface #################
ui <- fluidPage(
  theme = shinytheme("paper"), # Using shinythemes for a better visual theme
  
  titlePanel("Welcome to the Quality Control Dashboard for Assessing Inter-Assay Precision in Immunophenotyping"),
  br(),
  
  sidebarLayout(
    sidebarPanel(
      h3("Folder Selection"),
      # mit
      selectInput("folder_name", "Please select a subfolder from the dropdown list:", choices = c("Choose a folder" = "")),
      actionButton("goButton", "Start process", icon("paper-plane"), 
                   style="color: #fff; background-color: #337ab7; border-color: #2e6da4"), # Bootstrap class for styling
      
      br(),
      br(),
      br(),
      
      h3("Parameter Selection"),
      selectInput("parameter", "Please select a parameter from the dropdown list:", choices = c("Sum_HLA", "CD3+", "CD19+", "CD16+CD56+", "CD3+CD16+CD56+ %Lymphs", "CD3+CD16+CD56+ Abs Cnt", "CD3+CD4-CD8-", "CD3+CD4+ CD3+CD4+ (Excl. dual pos.)", "CD3+CD4+CD8+", "CD3+CD8-Anti-HLA-DR+ %CD8-", "CD3+CD8-CD38-Anti-HLA-DR-", "CD3+CD8-CD38-Anti-HLA-DR+", "CD3+CD8-CD38+ %CD8-", "CD3+CD8-CD38+Anti-HLA-DR-", "CD3+CD8-CD38+Anti-HLA-DR+", "CD3+CD8+", "CD3+CD8+ (Excl. dual pos.)", "CD3+CD8+Anti-HLA-DR+", "CD3+CD8+CD38-Anti-HLA-DR-", "CD3+CD8+CD38-Anti-HLA-DR+", "CD3+CD8+CD38+", "CD3+CD8+CD38+Anti-HLA-DR-", "CD3+CD8+CD38+Anti-HLA-DR+")),
      downloadButton("downloadPlot", "Download Plot (PDF)"),
      p("Click to download the regression plot for the selected parameter including the Pearson correlation coefficient in PDF format."),
      downloadButton("downloadData", "Download Data (CSV)"),
      p("Click to download the table with the extracted values for the selected parameter and the corresponding PDF file names in CSV format."),
    ),
    
    mainPanel(
      plotOutput("plot"), # The main panel displays the generated plot
      tableOutput("dataDisplay")) # Display for the data
  )
)




############### Create the Server ############### 
server <- function(input, output, session) {
  
  # Function to analyze data based on the selected parameter
  analyze_data <- function(parameter_name) {
    # Filter data
    selected_data <- selected_data_reactive()
    
    # Calculate correlation
    assign(paste0("correlation_", parameter_name), cor(selected_data$T_Lymphs_GV, selected_data$T_Lymphs_013), envir = .GlobalEnv)
    
    # Calculate p-value for the correlation
    cor.test_result <- cor.test(selected_data$T_Lymphs_GV, selected_data$T_Lymphs_013)
    assign(paste0("p_value_", parameter_name), cor.test_result$p.value, envir = .GlobalEnv)
    
    # Create a linear model
    lm_model <- lm(T_Lymphs_013 ~ T_Lymphs_GV, data = selected_data)
    assign(paste0("slope_", parameter_name), coef(lm_model)[2], envir = .GlobalEnv) # Extraktion der Steigung
    
    # Number of observations
    N <- nrow(selected_data)
    
    # Plot
    plot_name <- paste0("plot_", parameter_name)
    assign(plot_name, ggplot(data = selected_data, aes(x = T_Lymphs_GV, y = T_Lymphs_013)) +
             geom_point() +
             geom_smooth(method = "lm", se = FALSE, color = "red") +
             ggtitle(paste("Inter-assay regression for", parameter_name)) +
             xlab("% T lymphocytes") +
             ylab("% T lymphocytes") +
             annotate("text", x = min(selected_data$T_Lymphs_GV), y = max(selected_data$T_Lymphs_013), 
                      label = paste("Correlation = ", get(paste0("correlation_", parameter_name)),
                                    "\np-value = ", get(paste0("p_value_", parameter_name)),
                                    "\nSlope = ", get(paste0("slope_", parameter_name)),
                                    "\nN = ", N),
                      hjust = 0, vjust = 1) +
             theme_classic(), envir = .GlobalEnv)
    
    return(get(plot_name))
  }
  
  # This reactive variable is used to store the resulting combined dataset as a reactive element on the server
  combined_df <- reactiveVal(NULL)
  selected_data <- reactiveVal(NULL)
  
  observe({
    # Path to the parent directory
    parent_dir <- "Data"
    
    # Check whether the directory exists and list subfolders
    if (dir.exists(parent_dir)) {
      subfolders <- list.dirs(path = parent_dir, full.names = FALSE, recursive = FALSE)
      
      # Update the selection options of the drop-down menu, remove the path to the parent directory from the names
      updateSelectInput(session, "folder_name", choices = c("Choose a folder" = "", subfolders))
    }
  })
  
  
  observeEvent(input$goButton, { # When the "goButton" is clicked, this code is executed
    
    dir_path <- paste("Data", input$folder_name, sep = "/") # Selected folder for data processing
    pdf_files <- list.files(path = dir_path, pattern = ".pdf", full.names = TRUE) # Create a list of all PDF files in the directory
    
    
    ########### Your process to carry out data processing based on the entered folder ##############
    pdf_files_without_NoValue <- c() # Your process to carry out data processing based on the entered folder
    
    check_patterns_in_pdf <- function(file_path) { # Function to check if the given patterns are in the PDF contents and "No Value" is not present
      text_content <- pdf_text(file_path)
      lines <- unlist(strsplit(text_content, split = "\n"))
      relevant_lines <- lines[grepl("CD3\\+|CD19\\+|CD16\\+CD56\\+", lines)]
      
      # TRUE if "No Value" is not in relevant_lines and the patterns were found, otherwise FALSE
      return(!any(grepl("No Value", relevant_lines)))
    }
    
    for(pdf_file in pdf_files) { # Go through all PDF files and include those that have all the patterns and do not contain "No Value"
      if (check_patterns_in_pdf(pdf_file)) {
        pdf_files_without_NoValue <- c(pdf_files_without_NoValue, pdf_file)
      }
    }
    
    
    ########### Grouping by Measurement of the Same Sample ##############
    # Group measurements from different devices based on the first nine digits
    get_first_9_chars <- function(path) { # Function to get the first nine digits
      str_sub(basename(path), 1, 9)
    }
    
    grouped_pdfs <- split(pdf_files_without_NoValue, sapply(pdf_files_without_NoValue, get_first_9_chars)) # Grouping files based on the first nine digits
    
    paired_pdfs <- grouped_pdfs[sapply(grouped_pdfs, length) > 1] # Filter groups with more than one file
    
    ########### Functions for Grouping by the Same Type of Measurement ##############
    # Separate PDF files into GV and 013
    get_gv_files <- function(files) {
      return(grep("GV", files, value = TRUE))
    }
    
    get_013_files <- function(files) {
      return(grep("013", files, value = TRUE))
    }
    
    # Filter pairs that each have at least one gv_files and o13_files
    valid_paired_pdfs <- paired_pdfs[sapply(paired_pdfs, function(pair) {
      gv_files <- get_gv_files(pair)
      o13_files <- get_013_files(pair)
      length(gv_files) >= 1 && length(o13_files) >= 1
    })]
    
    ########### Function to Extract Data from a PDF ##############
    # Function to extract data from a PDF
    extract_data_from_pdf <- function(pdf_path) {
      txt <- pdf_text(pdf_path)
      lines <- unlist(strsplit(txt, "\n"))
      
      # Check if "HLA-DR" is present in the lines
      if(any(grepl("HLA-DR", lines))) {
        relevant_lines <- lines[grepl("HLA-DR|CD38", lines)]
      } else {
        relevant_lines <- lines[grepl("CD3\\+|CD19\\+|CD16\\+CD56\\+", lines)]
      }
      
      table_data_list <- list()
      
      # Process each relevant line
      for (i in 1:length(relevant_lines)) {
        split_data <- unlist(strsplit(relevant_lines[i], "\\s{2,}"))
        table_data_list[[i]] <- split_data
      }
      
      # Determine the longest line
      max_length <- max(sapply(table_data_list, length))
      
      # Adjust all other lines to this length
      for (i in 1:length(table_data_list)) {
        length(table_data_list[[i]]) <- max_length
      }
      
      # Convert the list into a data matrix
      table_data <- do.call(rbind, table_data_list)
      
      # Convert the data matrix into a data.frame
      df <- as.data.frame(table_data, stringsAsFactors = FALSE) %>%
        select(V1, V2)
      
      return(df)
    }
    
    all_data <- lapply(valid_paired_pdfs, function(pair) {
      
      gv_files <- get_gv_files(pair)
      o13_files <- get_013_files(pair)
      
      # Function for processing PDF-Files
      process_pdf_pair <- function(gv_file, o13_file) {
        df_GV <- extract_data_from_pdf(gv_file) %>%
          mutate(V2 = as.integer(V2)) %>%
          rename(Parameter_GV = V1, `T_Lymphs_GV` = V2) 
        
        df_013 <- extract_data_from_pdf(o13_file) %>%
          mutate(V2 = as.integer(V2)) %>%
          rename(Parameter_013 = V1, `T_Lymphs_013` = V2)
        
        merged_df <- left_join(df_GV, df_013, by = c("Parameter_GV" = "Parameter_013"))
        merged_df$PDF_names <- paste(basename(gv_file), basename(o13_file), sep = " & ")
        
        selected_rows <- merged_df %>% 
          filter(Parameter_GV %in% c("CD3+CD8+CD38+Anti-HLA-DR+", 
                                     "CD3+CD8+CD38-Anti-HLA-DR+", 
                                     "CD3+CD8-CD38+Anti-HLA-DR+", 
                                     "CD3+CD8-CD38-Anti-HLA-DR+"))
        
        summed_values <- selected_rows %>%
          summarise(
            Sum_T_Lymphs_013 = sum(`T_Lymphs_013`, na.rm = TRUE),
            Sum_T_Lymphs_GV = sum(`T_Lymphs_GV`, na.rm = TRUE)
          )
        
        sum_row <- data.frame(Parameter_GV = "Summe_HLA", 
                              `T_Lymphs_013` = summed_values$Sum_T_Lymphs_013, 
                              `T_Lymphs_GV` = summed_values$Sum_T_Lymphs_GV, 
                              PDF_names = "Summed values")
        
        merged_df <- rbind(merged_df, sum_row)
        
        return(merged_df)
      }
      
      # Loop through all gv_files and o13_files
      all_dfs <- mapply(process_pdf_pair, gv_files, o13_files, SIMPLIFY = FALSE)
      
      combined_df <- do.call(rbind, all_dfs)
      
      return(combined_df)
    })
    
    observeEvent(input$goButton, {
      combined_df(
        merged_data <- bind_rows(all_data) %>%
          filter(
            Parameter_GV != "Zusätzlicher Bericht für CD3+CD16+CD56+",
            Parameter_GV != "Leiter:",
            Parameter_GV != "CD8/CD38/CD3/Anti-HLA-DR",
            !(Parameter_GV == "Summe_HLA" & T_Lymphs_GV == 0),
            !(Parameter_GV == "Summe_HLA" & T_Lymphs_013 == 0)
          ))
      
      return(merged_data)
    })
  })
  
  selected_data_reactive <- reactive({
    if(!is.null(input$parameter) && nrow(combined_df()) > 0) {
      combined_df() %>%
        filter(Parameter_GV == input$parameter) %>%
        drop_na()
    } else {
      NULL
    }
  })
  
  # This code generates the plot based on the selected parameter
  current_plot <- reactive({
    if(!is.null(input$parameter) && nrow(combined_df()) > 0) {
      analyze_data(input$parameter)
    } else {
      NULL
    }
  })
  
  output$plot <- renderPlot({
    current_plot()
  })
  
  output$dataDisplay <- renderTable({
    selected_data_reactive()
  })
  
  output$downloadData <- downloadHandler(
    filename = function() {
      paste("data-", Sys.Date(), ".csv", sep="")
    },
    content = function(file) {
      write.csv(selected_data_reactive(), file)
    }
  )
  
  # When the download button is clicked, this code executes and downloads the plot.
  output$downloadPlot <- downloadHandler(
    filename = function() {
      paste0(input$parameter, ".pdf")
    },
    content = function(file) {
      if (!is.null(current_plot())) {
        ggsave(file, current_plot(), device = "pdf", dpi = 300)
      }
    }
  )
  
}


########### Run App ##############
shinyApp(ui = ui, server = server)