### R-Script supplied with Pirana to generate reference vs test PRED vs TIME, pluc Cmax and AUC comparisons
###
### Required: - Reference {vbe} with simulations results
###           - One or more test {vbe} with simulation results
### Description: Reads reference + test simulated concentrations, optionally adds log-normal noise, then plots
### mean conc vs time with overlaid observed data (if available), including 95% confidence intervals. Shows side by side
### box-plot comparisons of AUC and CMax.

### <arguments>
###       <custom_sigma_val label="Sigma" type="text">.2991788</custom_sigma_val>
###       <use_sigma_obs label="Compute sigma from obs" type="bool">0</use_sigma_obs>
###       <time_filter_val label="Filter time" type="text">8902</time_filter_val>
### </arguments>

### Arguments supplied from Pirana:
arg <- list (
  custom_sigma_val = ".2991788",
  use_sigma_obs = "0",
  time_filter_val = "8902"
)



# Setup ----
## Import .vbe and associated files ----
models <- list ( 
  "vbe_1" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_1_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_1_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=4.6357",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "D:/Users/james.craig/VBE/PP3/vbe_1_reference_group.csv",
    test_file       = "",
    type            = "reference",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 4.6357

  ),
  "vbe_8" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_8_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_8_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=4.8375",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_8_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 4.8357

  )
)
run_from <- list(software = "pirana", version = "25.7.1")
open_res <- 1

setwd('D:/Users/james.craig/VBE/PP3')

start_time <- Sys.time()
message("Start: ", start_time)

## Process script arguments ----
use_sigma_obs <- as.logical(as.numeric(arg$use_sigma_obs))
custom_sigma_val  <- as.numeric(arg$custom_sigma_val)
time_filter_val <- as.numeric(arg$time_filter_val)



## Validation ----
if (length(models) == 0) {
  stop(
    "Results missing for selected `.vbe`, ensure either {vbe}_reference_group.csv or {vbe}_test_group.csv exist in Pirana directory."
  )
}

reference_model <- models[sapply(models, function(x) x$type == "reference")]
pe_file <- reference_model[[1]]$pe_file
test_models <- models[sapply(models, function(x) x$type == "test")]

# Validate reference models
if (length(reference_model) == 0) {
  stop("Validation error: You must select exactly one 'reference' VBE model, but none were selected.")
} else if (length(reference_model) > 1) {
  stop(sprintf("Validation error: You must select exactly one 'reference' VBE model, but %d were selected.", length(reference_model)))
}

# Validate test models
if (length(test_models) == 0) {
  stop("Validation error: At least one 'test' VBE model must be selected, but none were selected.")
}

# Prepare directories ----
if (!file.exists("pirana_reports")) {
  dir.create ("pirana_reports")
}

if (!file.exists("pirana_temp")) {
  dir.create ("pirana_temp")
}

## Import libraries ----
if(Sys.getenv("R_LIB") != "") { .libPaths(c(Sys.getenv("R_LIB"), .libPaths())) }

# Check required packages
req_pkgs <- c("data.table", "Simcyp", "tidyr", "dplyr", "ggplot2", "cowplot")
missing_pkgs <- req_pkgs[!sapply(req_pkgs, requireNamespace, quietly = TRUE)]
if (length(missing_pkgs)) {
  stop("Package(s) missing: ", paste(missing_pkgs, collapse = ", "),
       "\nInstall with: install.packages(c(",
       paste(shQuote(missing_pkgs), collapse = ", "), "))",
       call. = FALSE)
}

library(ggplot2)
library(dplyr)
library(tidyr)

## Create Functions ----
get.AUC <- function(times, conc){
  # ensure 'conc' is a matrix
  if (is.null(dim(conc))) {
    conc <- matrix(conc, ncol = 1)
  }
  # now do the trapezoidal rule
  dx <- diff(times)
  # force matrix indexing so we never drop to a vector
  top <- conc[-1, , drop = FALSE] + conc[-nrow(conc), , drop = FALSE]
  return(colSums(top * dx) / 2)
}

## Analysis ----
# Import reference file and prepare data
reference_data <- data.table::fread(reference_model[[1]]$reference_file)


if (use_sigma_obs) {
  message("Computing sigma from observed data (pe_file)...")

  stopifnot(file.exists(pe_file))

  pe_data <- Simcyp::ReadPEData(path = pe_file, scale = 1)$Observations
  sim_times <- sort(unique(reference_data$Time))
  subject_ids <- intersect(unique(reference_data$ID), unique(pe_data$SubjectID))

  if (length(subject_ids) == 0) {
    stop("No matching SubjectIDs found between simulated and observed data.")
  }

  all_residuals <- c()

  for (sid in subject_ids) {
    pred <- reference_data[ID == sid, Conc]
    obs <- pe_data[pe_data$SubjectID == sid, ]

    if (nrow(obs) == 0 || length(pred) == 0) next

    pred_interp <- approx(sim_times, pred, obs$Time, rule = 2)$y
    dv_obs <- obs$DV

    if (length(pred_interp) != length(dv_obs)) next

    res <- log(dv_obs) - log(pred_interp)
    all_residuals <- c(all_residuals, res)
  }

  if (length(all_residuals) < 5) {
    warning("Fewer than 5 usable residuals found.")
  }

  sd_residuals <- sd(all_residuals, na.rm = TRUE)
  sigma <- sqrt(log(1 + sd_residuals^2))
  message("Estimated sigma from ",
          length(subject_ids),
          " subject(s): ",
          round(sigma, 4))
} else {
  sigma <- custom_sigma_val
  if (is.na(sigma)) {
    message("No sigma specified. No noise will be added to simulations.")
  } else {
    message("Using user-specified sigma: ", sigma)
  }
}

if (!is.na(sigma)) {
reference_data[, Conc := rlnorm(.N, meanlog = log(Conc), sdlog = sigma)]
}
reference_data[, subject_type := "Reference"]

if (file.exists(pe_file)) {
  obs_data <- Simcyp::ReadPEData(path=pe_file,scale=1)$Observations %>% as.data.frame()
} else {
  obs_data <- NULL
}

reference_data$ID <- as.factor(reference_data$ID)
# Import test files(s)
for (test_name in names(test_models)) {
  # read & noise-perturb test
  test_file <- test_models[[test_name]]$test_file
  test_data <- data.table::fread(test_file)
  if (!is.na(sigma)) test_data[, Conc := rlnorm(.N, log(Conc), sigma)]
  test_data[, subject_type := "Test"]
  test_data[, ID := as.factor(ID)]

  # combine sim data
  conc_data <- bind_rows(
    reference_data %>% select(Time,ID,Conc,subject_type),
    test_data      %>% select(Time,ID,Conc,subject_type)
  ) %>% mutate(Subject = paste(subject_type, ID, sep="_"))

  # if (is.numeric(time_filter_val)) {
  #   conc_data <- filter(Time > time_filter_val)
  # }

  # 1) summary ribbon of sim + overlay obs
  summary_data <- conc_data %>%
    # toss out the log(0) NAs
    filter(!is.na(Conc) & Conc > 0) %>%
    group_by(Time, subject_type) %>%
    summarise(
      mean_Conc = mean(Conc),
      sd_Conc   = sd(Conc),
      n         = n(),
      se_Conc   = sd_Conc/sqrt(n),
      # 95% CI around the mean
      ci_low    = mean_Conc - qt(0.975, df=n-1)*se_Conc,
      ci_high   = mean_Conc + qt(0.975, df=n-1)*se_Conc,
      .groups="drop"
    )

  p1 <- ggplot(summary_data, aes(Time, mean_Conc, color=subject_type, fill=subject_type)) +
    geom_ribbon(aes(ymin=ci_low, ymax=ci_high), alpha=0.3) +
    geom_line(size=1)

  if (!is.null(obs_data)) {
    p1 <- p1 + geom_point(
      data = obs_data,
      aes(Time, DV),
      inherit.aes = FALSE,
      color = "black",
      size = 1,
      alpha = 0.6
    )
  }
  p1 <- p1 +
    scale_y_log10() +
    labs(
      y = "Mean Concentration",
      color = "Type",
      fill  = "Type"
    ) +
    theme_minimal()

  shared_legend <- cowplot::get_legend(p1)
  p1 <- p1 + theme(legend.position="none")

  # 2) Cmax boxplots
  Cmax_data <- conc_data %>%
    group_by(subject_type, Subject) %>%
    summarise(Cmax = max(Conc), .groups="drop")

  p2 <- ggplot(Cmax_data, aes(subject_type, Cmax, fill=subject_type)) +
    geom_boxplot() +
    scale_y_log10() +
    labs(y="Cmax", x = "") +
    theme_minimal() +
    theme(legend.position="none")

  # 3) AUC boxplots (last dosing interval)
  auc_data  <- conc_data %>% filter(Time > time_filter_val)
  deltaT    <- max(auc_data$Time) - min(auc_data$Time)
  AUC_data  <- auc_data %>%
    group_by(subject_type, Subject) %>%
    summarise(AUC = get.AUC(Time, matrix(Conc, ncol=1)), .groups="drop") %>%
    mutate(AUC_per_T = AUC / deltaT)

  p3 <- ggplot(AUC_data, aes(subject_type, AUC_per_T, fill=subject_type)) +
    geom_boxplot() +
    scale_y_log10() +
    labs(y=expression(AUC/Delta[t]), x = "") +
    theme_minimal() +
    theme(legend.position="none")

  # combine into a single row
  combined <- cowplot::plot_grid(
    p1, p2, p3, shared_legend,
    nrow=1,
    rel_widths = c(2, 1, 1, 0.4),
    align="h"
  )

  # write PDF
  fname <- file.path(
    "pirana_reports",
    paste0(names(reference_model), "_compare_pred_", test_name, ".pdf")
  )
  pdf(fname, width=8, height=6)
  print(combined)
  dev.off()

  # open created file
  cat (paste("OUTPUT: ", fname, sep = ""))
  if (file.exists(fname) && open_res) {
    if (Sys.info()['sysname'] == 'Windows') {
      shell.exec(paste(getwd(), "/", fname, sep = ""))
    }  # windows
    else if (Sys.info()['sysname'] == 'Darwin') {
      system(paste ("open ", fname, sep = ""))
    } # mac
    else {
      system(
        paste("xdg-open ", fname, sep = ""),
        ignore.stdout = TRUE,
        ignore.stderr = TRUE,
        wait = FALSE
      )
    } # linux
  }
}

end_time <- Sys.time()
message("Complete: ", end_time)
message(capture.output(end_time - start_time))

