### R-Script supplied with Pirana to perform CQA sensitivity analysis for Simcyp VBE
###
### Required: - Reference group predictions file generated from Simcyp VBE Shiny application e.g., {vbe}_reference.csv
###           - Test group predictions file generated from Simcyp VBE Shiny application
###           - PE data file (Observations data file associated with Simcyp workspace)
### Description:
###   This script evaluates the sensitivity of a CQA (e.g., PSD mean radius) on the GMR of CMax or AUC.
###   It performs the following:
###     - Computes individual-level CMax and AUC values from simulated data
###     - Adds log-normal noise from a user-specified sigma or estimated from PE file
###     - Calculates log-transformed GMRs and associated variances for test/reference pairs
###     - Fits four candidate models to describe the relationship between log GMR and CQA
###     - Automatically selects the best-fitting model using absolute residual error
###     - Estimates the CQA values corresponding to GMR bounds (e.g., 0.8 and 1.25)
###     - Visualizes each candidate model fit and highlights the selected model
###     - Saves intermediate CQA metrics and selected model for use in follow-up scripts (e.g., safe space analysis)

### <arguments>
###       <gmr_type label="GMR" type="choice">CMax;AUC??CMax</gmr_type>
###       <use_sigma_obs label="Compute sigma from obs" type="bool">0</use_sigma_obs>
###       <obs_scale_val label="Obs scale" type="text">1</obs_scale_val>
###       <custom_sigma_val label="Sigma" type="text">.2991788</custom_sigma_val>
###       <time_filter_val label="Filter time" type="text">8902</time_filter_val>
###       <ratio_lower label="Lower GMR" type="text">0.8</ratio_lower>
###       <ratio_upper label="Upper GMR" type="text">1.25</ratio_upper>
### </arguments>

### Arguments supplied from Pirana:
arg <- list (
  gmr_type = "CMax",
  use_sigma_obs = "0",
  obs_scale_val = "1",
  custom_sigma_val = ".2991788",
  time_filter_val = "8902",
  ratio_lower = "0.8",
  ratio_upper = "1.25"
)


# Setup ----
## Import .vbe and associated files ----
models <- list ( 
  "vbe_1" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_1_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_1_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=4.6357",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "D:/Users/james.craig/VBE/PP3/vbe_1_reference_group.csv",
    test_file       = "",
    type            = "reference",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 4.6357

  ),
  "vbe_2" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_2_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_2_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=1.80",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_2_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 1.8

  ),
  "vbe_3" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_3_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_3_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=2.0",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_3_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 2

  ),
  "vbe_4" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_4_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_4_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=2.6",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_4_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 2.6

  ),
  "vbe_5" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_5_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_5_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=2.7762",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_5_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 2.7762

  ),
  "vbe_6" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_6_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_6_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=3.0",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_6_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 3

  ),
  "vbe_7" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_7_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_7_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=4.6357",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_7_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 4.6357

  ),
  "vbe_8" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_8_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_8_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=4.8375",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_8_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 4.8357

  ),
  "vbe_9" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_9_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_9_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=5.0679",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_9_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 5.0679

  ),
  "vbe_10" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_10_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_10_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=5.5",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_10_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 5.5

  ),
  "vbe_11" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_11_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_11_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=6.23",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_11_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 6.23

  ),
  "vbe_12" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_12_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_12_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=6.45",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_12_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 6.45

  ),
  "vbe_13" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_13_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_13_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=6.7",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_13_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 6.7

  ),
  "vbe_14" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_14_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_14_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=6.823",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_14_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 6.8233

  ),
  "vbe_15" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_15_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_15_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=7.1",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_15_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 7.1

  ),
  "vbe_16" = list ( 
    wksz_file       = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_16_PP3M 350mg.wksz",
    pe_file         = "D:/Users/james.craig/VBE/PP3/pirana_temp/vbe_16_magnusson_fig6_panel2_350mg.xml",
    description     = "PSD=7.5",
    working_dir     = "D:/Users/james.craig/VBE/PP3",
    reference_file  = "",
    test_file       = "D:/Users/james.craig/VBE/PP3/vbe_16_test_group.csv",
    type            = "test",
    cqa_name        = "idLogNormalRadiusMean",
    cqa_value       = 7.5

  )
)
run_from <- list(software = "pirana", version = "25.7.1")
open_res <- 1

setwd('D:/Users/james.craig/VBE/PP3')

start_time <- Sys.time()
message("Start: ", start_time)

## Process script arguments ----
use_sigma_obs <- as.logical(as.numeric(arg$use_sigma_obs))
obs_scale_val <- as.numeric(arg$obs_scale_val)
custom_sigma_val  <- as.numeric(arg$custom_sigma_val)
time_filter_val <- as.numeric(arg$time_filter_val)
use_combined <-  as.logical(as.numeric(arg$use_combined))
ratio_lower <- as.numeric(arg$ratio_lower)
ratio_upper <- as.numeric(arg$ratio_upper)
gmr_type <- arg$gmr_type

## Validation ----
if (length(models) == 0) {
  stop(
    "Results missing for selected `.vbe`, ensure either {vbe}_reference_group.csv or {vbe}_test_group.csv exist in Pirana directory."
  )
}

reference_model <- models[sapply(models, function(x) x$type == "reference")]
test_models <- models[sapply(models, function(x) x$type == "test")]

# Validate reference models
if (length(reference_model) == 0) {
  stop("Validation error: You must select exactly one 'reference' VBE model, but none were selected.")
} else if (length(reference_model) > 1) {
  stop(sprintf("Validation error: You must select exactly one 'reference' VBE model, but %d were selected.", length(reference_model)))
}

# Validate test models
if (length(test_models) == 0) {
  stop("Validation error: At least one 'test' VBE model must be selected, but none were selected.")
}

# Prepare directories ----
if (!file.exists("pirana_reports")) {
  dir.create ("pirana_reports")
}

if (!file.exists("pirana_temp")) {
  dir.create ("pirana_temp")
}

## Import libraries ----
if(Sys.getenv("R_LIB") != "") { .libPaths(c(Sys.getenv("R_LIB"), .libPaths())) }

## Import libraries ----
if(Sys.getenv("R_LIB") != "") { .libPaths(c(Sys.getenv("R_LIB"), .libPaths())) }
# Check required packages
req_pkgs <- c("data.table", "Simcyp", "tidyr", "ggplot2")
missing_pkgs <- req_pkgs[!sapply(req_pkgs, requireNamespace, quietly = TRUE)]
if (length(missing_pkgs)) {
  stop("Package(s) missing: ", paste(missing_pkgs, collapse = ", "),
       "\nInstall with: install.packages(c(",
       paste(shQuote(missing_pkgs), collapse = ", "), "))",
       call. = FALSE)
}

library(ggplot2)

## Create Functions ----
get.AUC <- function(times, conc) {
  dx <- diff(times)
  (colSums((conc[-1, ] + conc[-nrow(conc), ]) * dx)) / 2
}

get.Cmax <- function(conc) {
  return(apply(conc, MARGIN=2, FUN=max))
}

# Analysis ----
if (use_sigma_obs) {
  message("Computing sigma from observed data (pe_file)...")

  stopifnot(file.exists(pe_file))

  pe_data <- Simcyp::ReadPEData(path = pe_file, scale = obs_scale_val)$Observations
  sim_times <- sort(unique(reference_data$Time))
  subject_ids <- intersect(unique(reference_data$ID), unique(pe_data$SubjectID))

  if (length(subject_ids) == 0) {
    stop("No matching SubjectIDs found between simulated and observed data.")
  }

  all_residuals <- c()

  for (sid in subject_ids) {
    pred <- reference_data[ID == sid, Conc]
    obs <- pe_data[pe_data$SubjectID == sid, ]

    if (nrow(obs) == 0 || length(pred) == 0) next

    pred_interp <- approx(sim_times, pred, obs$Time, rule = 2)$y
    dv_obs <- obs$DV

    if (length(pred_interp) != length(dv_obs)) next

    res <- log(dv_obs) - log(pred_interp)
    all_residuals <- c(all_residuals, res)
  }

  if (length(all_residuals) < 5) {
    warning("Fewer than 5 usable residuals found.")
  }

  sd_residuals <- sd(all_residuals, na.rm = TRUE)
  sigma <- sqrt(log(1 + sd_residuals^2))
  message("Estimated sigma from ",
          length(subject_ids),
          " subject(s): ",
          round(sigma, 4))
} else {
  sigma <- custom_sigma_val
  if (is.na(sigma)) {
    message("No sigma specified. No noise will be added to simulations.")
  } else {
    message("Using user-specified sigma: ", sigma)
  }
}


group_files <- c(reference_model[[1]]$reference_file,
                 unname(sapply(test_models, function(x) x$test_file))
)

Cmax_logmeans <- Cmax_logvars <- AUC_logmeans  <- AUC_logvars  <- group_sizes <- rep(NA, length(group_files))

# Import test files(s)
model_names <- names(models)


for (i in seq_along(group_files)) {
  ## Read in group results
    group_data <- data.table::fread(file=group_files[i])
    ## Add measurement noise to the reference (using magnusson RSE) if required
    if (nzchar(custom_sigma_val)) {
      group_data[, Conc := rlnorm(.N, meanlog = log(Conc), sdlog = sigma)]
    }
    group_data[, ID_prefixed := paste0("Subj", ID)]

    wide_group <- data.table::dcast(
      data     = group_data,
      formula  = Time ~ ID_prefixed,
      value.var= "Conc"
    )
    ##
    ## Keep only the times in the last dosing period
    last_dosing_time <- time_filter_val
    keep <- which(wide_group[,1] > last_dosing_time)
    wide_group <- wide_group[keep,]
    ## Total number of subjects
    N.subj <- ncol(wide_group) - 1
    ##
    times <- wide_group$Time
    plot.times <- times / (24 * 7) # in weeks

    ## find Cmax for each subject
    Cmax <- get.Cmax(wide_group[, -1])
    ## find AUC for each subject
    AUC  <- get.AUC(plot.times, wide_group[, -1])
    ##
    ## Compute group mean and variance of log AUC and log Cmax values
    Cmax_logmeans[i] = mean(log(Cmax))
    Cmax_logvars[i]  = var(log(Cmax))
    AUC_logmeans[i]  = mean(log(AUC))
    AUC_logvars[i]   = var(log(AUC))
    group_sizes[i]   = ncol(wide_group) - 1
    ##
    message(model_names[i], " completed.")
}

## Compute log GMR means and variances; variances are scaled by group size
CmaxR_logmeans <- CmaxR_logvars <- rep(NA, length(group_files))
AUCR_logmeans  <- AUCR_logvars  <- rep(NA, length(group_files))

for (i in 2:length(group_files)){ # for each test dataset, skipping the reference
  CmaxR_logmeans[i] <- Cmax_logmeans[i] - Cmax_logmeans[1] # test/reference
  AUCR_logmeans[i]  <- AUC_logmeans [i] - AUC_logmeans [1] # test/reference
  ##
  CmaxR_logvars[i]  <- Cmax_logvars[i] / group_sizes[i] +
    Cmax_logvars[1] / group_sizes[1]
  AUCR_logvars[i]   <- AUC_logvars [i] / group_sizes[i] +
    AUC_logvars [1] / group_sizes[1]
}

# Get CQA values
descriptions <- sapply(models, function(x) paste0(x$cqa_name, " = ", x$cqa_value))
cqa_values <- as.numeric(sub(".*?([0-9]+\\.?[0-9]*).*", "\\1", descriptions))

## Format and save GMR results as a dataframe
metric_df_ratios <- data.frame(cqa_values,
                    CmaxR_logmeans, AUCR_logmeans, CmaxR_logvars, AUCR_logvars)

## Remove reference (line 1)
metric_df_ratios <- metric_df_ratios[-1,]

## Format and save log Cmax and AUC means and variances as a dataframe
## Do not remove reference...
metric_df_raw <- data.frame(cqa_values,
                    Cmax_logmeans, AUC_logmeans,
                    Cmax_logvars, AUC_logvars, group_sizes)

## BE limits to plot as the horizontal dotted lines on the graphs
BE.lims <- c(ratio_lower, ratio_upper)

# Prepare models
x <- metric_df_ratios$cqa_values

if (gmr_type == "CMax") {
  y <- metric_df_ratios$CmaxR_logmeans
  w <- metric_df_ratios$CmaxR_logvars
} else {
  y <- metric_df_ratios$AUCR_logmeans
  w <- metric_df_ratios$AUCR_logvars
}

models_lm <- list(
  y_x       = lm(y ~ x, weights = 1/w),
  y_logx    = lm(y ~ log(x), weights = 1/w),
  expy_x    = lm(exp(y) ~ x, weights = 1/w),
  expy_logx = lm(exp(y) ~ log(x), weights = 1/w)
)

# Calculate absolute residual sum for each model
residual_sums <- sapply(models_lm, function(model) {
  y_true <- eval(model$call$formula[[2]])
  if (grepl("exp", deparse(model$call$formula[[2]]))) {
    sum(abs(log(predict(model)) - log(y_true)))
  } else {
    sum(abs(predict(model) - y_true))
  }
})

# Identify the selected model
selected_index <- which.min(residual_sums)
selected_name <- names(models_lm)[selected_index]
selected_model <- models_lm[[selected_index]]

# Calculate x thresholds for GMR = 0.8 and 1.25
intercept <- selected_model$coefficients[[1]]
slope <- selected_model$coefficients[[2]]


# Transform to get normal range
if (grepl("exp\\(y\\)", deparse(selected_model$call$formula[[2]]))) {
  ratio_lower_t <- ratio_lower
  ratio_upper_t <- ratio_upper
} else {
  ratio_lower_t <- log(ratio_lower)
  ratio_upper_t <- log(ratio_upper)
}

if (grepl("log\\(x\\)", deparse(selected_model$call$formula[[3]]))) {
  # x was log-transformed in the model â solve in log-space, then exp() to get x
  cqa_x_lower <- exp((ratio_lower_t - intercept) / slope)
  cqa_x_upper <- exp((ratio_upper_t - intercept) / slope)
} else {
  # x is on original scale
  cqa_x_lower <- (ratio_lower_t - intercept) / slope
  cqa_x_upper <- (ratio_upper_t - intercept) / slope
}

# Prepare file
fname <-
  paste0("pirana_reports/",
         names(reference_model),
         paste0("_CQA_", gmr_type, "_Log_GMR_Ratio_"),
         paste0(names(test_models),collapse = "_"),
         ".pdf")
# Create plot
titles <- c(
  y_x = "Model: y ~ x",
  y_logx = "Model: y ~ log(x)",
  expy_x = "Model: exp(y) ~ x",
  expy_logx = "Model: exp(y) ~ log(x)"
)
# Plotting setup
pdf(fname, width = 8, height = 6)
par(mfrow = c(2, 2), mar = c(4, 4, 4, 1), oma = c(0, 0, 3, 0))  # extra top space per plot

for (i in seq_along(models_lm)) {
  name <- names(models_lm)[i]
  model <- models_lm[[name]]

  # Get x and y values from the model formula
  fmla <- formula(model)
  y_vals <- eval(fmla[[2]])
  x_vals <- eval(fmla[[3]])

  # Circle size scaling
  var_scaled <- sqrt(w)
  var_scaled <- (var_scaled - min(var_scaled)) / (max(var_scaled) - min(var_scaled))
  cex_vals <- 0.5 + var_scaled * 2

  # Plot
  plot(x_vals, y_vals, cex = cex_vals,
       xlab = deparse(fmla[[3]]),
       ylab = deparse(fmla[[2]]),
       main = titles[[name]])

  abline(model)

  # Add BE lines
  if (grepl("exp", name)) {
    abline(h = BE.lims, lty = 3)
  } else {
    abline(h = log(BE.lims), lty = 3)
  }

  legend("topright", legend = paste("Sum|Res|:", format(residual_sums[[name]], dig = 3)), bty = "n")

  # Annotate selected model
  if (i == selected_index) {
    box(col = "blue", lwd = 2)

    # Blue annotation inside the plot
    mtext("Selected Model", side = 3, line = 0.1, col = "blue", cex = 0.8, font = 2)

    # Add vertical lines
    if (grepl("log", name)) {
      abline(v = log(cqa_x_lower), col = "blue", lty = 2)
      abline(v = log(cqa_x_upper), col = "blue", lty = 2)
    } else {
      abline(v = cqa_x_lower, col = "blue", lty = 2)
      abline(v = cqa_x_upper, col = "blue", lty = 2)
    }
  }
}

# Add global title and CQA info
mtext(paste0(gmr_type, " Log GMR Ratio"), outer = TRUE, line = 1.5, cex = 1.2)

mtext(
  sprintf("CQA value (GMR=%.2f): %.4f", ratio_lower, cqa_x_lower),
  side = 3, line = 0.2, outer = TRUE, col = "blue", cex = 0.9, font = 2
)
mtext(
  sprintf("CQA value (GMR=%.2f): %.4f", ratio_upper, cqa_x_upper),
  side = 3, line = -0.7, outer = TRUE, col = "blue", cex = 0.9, font = 2
)

dev.off()

cat (paste("OUTPUT: ", fname, sep=""))

if (file.exists(fname) &&
    open_res) {
  if (Sys.info()['sysname'] == 'Windows') {
    shell.exec(paste(getwd(), "/", fname, sep = ""))
  }  # windows
  else if (Sys.info()['sysname'] == 'Darwin') {
    system(paste ("open ", fname, sep = ""))
  } # mac
  else {
    system(
      paste("xdg-open ", fname, sep = ""),
      ignore.stdout = TRUE,
      ignore.stderr = TRUE,
      wait = FALSE
    )
  } # linux
}

# Save CQA metrics cache

fname <-
  paste0("pirana_temp/",
         names(reference_model), "_CQA_metrics_",
         paste0(names(test_models),collapse = "_"),
         ".Rds")
saveRDS(
  list(
    metric_df_ratios = metric_df_ratios,
    metric_df_raw = metric_df_raw,
    model = selected_model,
    cqa_gmr_lower_val = cqa_x_lower,
    cqa_gmr_upper_val = cqa_x_upper
  ),
  file = fname
)

# Log Variance Plot
# Data
x <- metric_df_ratios$cqa_values
y <- metric_df_raw$Cmax_logvars[-1]   # variance on original scale
w <- metric_df_raw$group_sizes[-1]

# Define models
models_lm <- list(
  logy_x    = lm(log(y) ~ x,       weights = w),
  logy_logx = lm(log(y) ~ log(x),  weights = w),
  y_x       = lm(y       ~ x,      weights = w),
  y_logx    = lm(y       ~ log(x), weights = w)
)

# Calculate residual sum on original scale
residual_sums <- sapply(models_lm, function(model) {
  y_pred <- predict(model)
  if (grepl("log\\(y\\)", deparse(model$call$formula[[2]]))) {
    y_pred <- exp(y_pred)  # back-transform to original variance scale
  }
  sum(abs(y_pred - y))
})

# Identify best model
selected_index <- which.min(residual_sums)
selected_model <- models_lm[[selected_index]]
selected_name  <- names(models_lm)[selected_index]

# Loop through and plot each model
titles <- c(
  logy_x    = "Model: log(y) ~ x",
  logy_logx = "Model: log(y) ~ log(x)",
  y_x       = "Model: y ~ x",
  y_logx    = "Model: y ~ log(x)"
)

fname <-
  paste0("pirana_reports/",
         names(reference_model),
         paste0("_CQA_", gmr_type, "_Log_Variance_"),
         paste0(names(test_models),collapse = "_"),
         ".pdf")

pdf(fname, width = 8, height = 6)

par(mfrow = c(2, 2), mar = c(4, 4, 4, 1), oma = c(0, 0, 3, 0))  # extra top space per plot

for (i in seq_along(models_lm)) {
  name <- names(models_lm)[i]
  model <- models_lm[[name]]

  fmla <- formula(model)
  y_vals <- eval(fmla[[2]])
  x_vals <- eval(fmla[[3]])

  # Weight-based point size
  group_scaled <- sqrt(w)
  group_scaled <- (group_scaled - min(group_scaled)) / (max(group_scaled) - min(group_scaled))
  cex_vals <- 0.5 + group_scaled * 2

  plot(x_vals, y_vals,
       cex = cex_vals,
       xlab = deparse(fmla[[3]]),
       ylab = deparse(fmla[[2]]),
       main = titles[[name]])

  abline(model)

  # Add horizontal lines
  if (grepl("log\\(y\\)", name)) {
    abline(h = log(BE.lims), lty = 3)
  } else {
    abline(h = BE.lims, lty = 3)
  }

  # Residual legend
  resid_sum <- residual_sums[[name]]
  legend("topleft", legend = paste0("Sum|Residuals|: ", format(resid_sum, dig = 3)), bty = "n")

  # Highlight selected model
  if (i == selected_index) {
    box(col = "blue", lwd = 2)
    mtext("Selected Model", side = 3, line = 0.1, col = "blue", cex = 0.8, font = 2)

    # Add slope/intercept annotation
    info <- paste0("Intercept: ", format(coef(model)[1], digits = 3),
                   ", Slope: ", format(coef(model)[2], digits = 3))
  }
}

mtext(paste0(gmr_type, " Log Variance"), outer = TRUE, line = 1.5, cex = 1.2)
mtext(
  paste0("Intercept: ", format(coef(model)[1], digits = 3),
         ", Slope: ", format(coef(model)[2], digits = 3)),
  side = 3, line = 0.3, outer = TRUE, col = "blue", cex = 0.9
)
dev.off()

cat (paste("OUTPUT: ", fname, sep=""))

if (file.exists(fname) &&
    open_res) {
  if (Sys.info()['sysname'] == 'Windows') {
    shell.exec(paste(getwd(), "/", fname, sep = ""))
  }  # windows
  else if (Sys.info()['sysname'] == 'Darwin') {
    system(paste ("open ", fname, sep = ""))
  } # mac
  else {
    system(
      paste("xdg-open ", fname, sep = ""),
      ignore.stdout = TRUE,
      ignore.stderr = TRUE,
      wait = FALSE
    )
  } # linux
}

end_time <- Sys.time()
message("Complete: ", end_time)
message(capture.output(end_time - start_time))

