#' Fit Single Bivariate Hurdle Model
#'
#' Fits a bivariate hurdle negative binomial model with horseshoe
#' priors using Stan/CmdStan.
#'
#' @param DT A data.table with the data.
#' @param k Integer; lag order.
#' @param spec Character; model specification ("A", "B", "C", "D").
#' @param controls Character vector of control variable names.
#' @param model A compiled CmdStan model object. If NULL, the package
#'   default model is loaded.
#' @param output_dir Directory for CmdStan output files. If NULL, uses
#'   a temporary directory.
#' @param iter_warmup Integer; warmup iterations.
#' @param iter_sampling Integer; sampling iterations.
#' @param chains Integer; number of chains.
#' @param seed Integer; random seed.
#' @param adapt_delta Numeric; adaptation target acceptance rate.
#' @param max_treedepth Integer; maximum tree depth.
#' @param threads_per_chain Integer; threads per chain.
#' @param hs_tau0 Numeric; horseshoe tau0 parameter.
#' @param hs_slab_scale Numeric; horseshoe slab scale.
#' @param hs_slab_df Numeric; horseshoe slab degrees of freedom.
#' @param verbose Logical; print progress messages.
#'
#' @return A list with components:
#'   \item{fit}{The CmdStanMCMC fit object.}
#'   \item{des}{The design matrices used.}
#'   \item{spec}{The model specification.}
#'   \item{k}{The lag order.}
#'   \item{hs_tau0, hs_slab_scale, hs_slab_df}{Horseshoe hyperparameters.}
#'   \item{controls}{Control variables used.}
#'   \item{output_dir}{Directory with output files.}
#'
#' @export
fit_one <- function(DT, k, spec = c("A", "B", "C", "D"),
                    controls = character(0),
                    model = NULL,
                    output_dir = NULL,
                    iter_warmup = 1000, iter_sampling = 1200, chains = 4,
                    seed = NULL, adapt_delta = 0.95, max_treedepth = 12,
                    threads_per_chain = 1L,
                    hs_tau0 = 0.5, hs_slab_scale = 5, hs_slab_df = 4,
                    verbose = TRUE) {
  
  spec <- match.arg(spec)
  ctrl_tag <- if (length(controls) == 0) "None" else paste0(controls, collapse = "+")
  
  if (verbose) {
    message(sprintf("Fitting spec=%s, k=%d, controls=[%s]", spec, k, ctrl_tag))
  }
  
  if (is.null(model)) {
    model <- get_hurdle_model()
  }
  
  des <- switch(spec,
                A = build_design(DT, k, include_C_to_I = TRUE,  include_I_to_C = FALSE, controls = controls),
                B = build_design(DT, k, include_C_to_I = FALSE, include_I_to_C = TRUE,  controls = controls),
                C = build_design(DT, k, include_C_to_I = TRUE,  include_I_to_C = TRUE,  controls = controls),
                D = build_design(DT, k, include_C_to_I = FALSE, include_I_to_C = FALSE, controls = controls)
  )
  
  stan_data <- list(
    T = length(des$idx),
    y_I = as.integer(des$y_I),
    y_C = as.integer(des$y_C),
    log_exposure50 = des$log_exposure50,
    P_pi_I = ncol(des$X_pi_I), P_mu_I = ncol(des$X_mu_I),
    P_pi_C = ncol(des$X_pi_C), P_mu_C = ncol(des$X_mu_C),
    X_pi_I = des$X_pi_I, X_mu_I = des$X_mu_I,
    X_pi_C = des$X_pi_C, X_mu_C = des$X_mu_C,
    hs_tau0 = hs_tau0, hs_slab_scale = hs_slab_scale, hs_slab_df = hs_slab_df
  )
  
  if (is.null(output_dir)) {
    output_dir <- file.path(tempdir(), sprintf("bivarhr_%s_k%d_%s",
                                               spec, k, format(Sys.time(), "%Y%m%d%H%M%S")))
  }
  if (!dir.exists(output_dir)) {
    dir.create(output_dir, recursive = TRUE)
  }
  
  fit <- model$sample(
    data = stan_data,
    iter_warmup = iter_warmup, iter_sampling = iter_sampling,
    chains = chains, seed = seed,
    adapt_delta = adapt_delta, max_treedepth = max_treedepth,
    refresh = 0, show_messages = FALSE,
    threads_per_chain = threads_per_chain,
    parallel_chains = chains,
    output_dir = output_dir
  )
  
  list(
    fit = fit,
    des = des,
    spec = spec,
    k = k,
    hs_tau0 = hs_tau0,
    hs_slab_scale = hs_slab_scale,
    hs_slab_df = hs_slab_df,
    controls = controls,
    output_dir = output_dir
  )
}


#' Get Default Hurdle Model
#'
#' Loads and compiles the package's default Stan model.
#'
#' @return A compiled CmdStanModel object.
#'
#' @export
get_hurdle_model <- function() {
  
  if (!requireNamespace("cmdstanr", quietly = TRUE)) {
    stop("Package 'cmdstanr' is required. Install with:\n
         install.packages('cmdstanr', repos = c('https://stan-dev.r-universe.dev', getOption('repos')))")
  }
  
  stan_file <- system.file("stan", "hurdle_nb_bivariate.stan", package = "bivarhr")
  
  if (!nzchar(stan_file) || !file.exists(stan_file)) {
    stop("Stan model file not found. Package may not be installed correctly.")
  }
  
  cmdstanr::cmdstan_model(
    stan_file,
    cpp_options = list(stan_threads = TRUE),
    quiet = TRUE
  )
}

#' Build CmdStan model with custom FLOOR constant
#'
#' Takes a Stan program as a single string and replaces the declaration
#' of the scalar constant \code{FLOOR} with a user supplied numeric
#' value, then compiles it as a CmdStanR model with threading enabled.
#'
#' @param stan_code Character string containing the Stan program. It
#'   must include a line of the form \code{real FLOOR = ...;} that will
#'   be replaced.
#' @param floor_value Numeric scalar used to set the constant
#'   \code{FLOOR} in the generated Stan code.
#'
#' @return A CmdStanModel object (requires 'cmdstanr' package).
#'
#' @details The replacement is performed using a regular expression,
#'   so the Stan code must follow the pattern used in the bivariate
#'   hurdle model templates of this package. The compiled model has
#'   \code{stan_threads} turned on via \code{cpp_options}.
#'
#' @keywords internal

.build_model_with_floor <- function(stan_code, floor_value) {
  
  if (!requireNamespace("cmdstanr", quietly = TRUE)) {
    stop("Package 'cmdstanr' is required. Install with:\n
         install.packages('cmdstanr', repos = c('https://stan-dev.r-universe.dev', getOption('repos')))")
  }
  
  pat <- "real\\s+FLOOR\\s*=\\s*-?[0-9eE\\.]+\\s*;"
  stan_code_floor <- gsub(pat, sprintf("real FLOOR = %.0f;", floor_value),
                          stan_code, perl = TRUE)
  cmdstanr::cmdstan_model(
    cmdstanr::write_stan_file(stan_code_floor),
    cpp_options = list(stan_threads = TRUE),
    quiet = TRUE
  )
}

#' Smoke Test for FLOOR ELPD Invariance
#'
#' Tests that the ELPD ranking is invariant to different FLOOR penalty values
#' in the Stan model.
#'
#' @param DT Data.table with the data.
#' @param stan_code Character; Stan model code.
#' @param floors Numeric vector of FLOOR values to test.
#' @param spec Character; model specification.
#' @param controls Character vector of control variables.
#' @param k_grid Integer vector of lag values to test.
#' @param hs_grid Data.frame with horseshoe hyperparameter grid.
#' @param hs_rows Integer vector; which rows of hs_grid to use.
#' @param iter_warmup Integer; warmup iterations.
#' @param iter_sampling Integer; sampling iterations.
#' @param chains Integer; number of chains.
#' @param seed Integer; random seed.
#' @param verbose Logical; print progress messages.
#'
#' @return A list with components:
#'   \item{same_order}{Logical; TRUE if ranking is identical across all FLOOR values.}
#'   \item{floors}{The tested FLOOR values.}
#'   \item{tables}{List of result tables for each FLOOR.}
#'   \item{combined}{Combined data.frame of all results.}
#'   \item{rank_signatures}{Character vector of ranking signatures.}
#'
#' @export
smoketest_floor_elpd_invariance <- function(
    DT,
    stan_code,
    floors = c(-1e6, -1e8, -1e4),
    spec = "C",
    controls = character(0),
    k_grid = 0:1,
    hs_grid = data.frame(
      hs_tau0 = c(0.1, 0.5),
      hs_slab_scale = c(1, 5),
      hs_slab_df = 4
    ),
    hs_rows = 1:2,
    iter_warmup = 200,
    iter_sampling = 200,
    chains = 2,
    seed = 123,
    verbose = TRUE
) {
  
  if (!requireNamespace("cmdstanr", quietly = TRUE)) {
    stop("Package 'cmdstanr' is required. Install with:\n
         install.packages('cmdstanr', repos = c('https://stan-dev.r-universe.dev', getOption('repos')))")
  }
  
  hs_grid_subset <- hs_grid[hs_rows, , drop = FALSE]
  
  build_model_with_floor <- function(stan_code, floor_value) {
    pat <- "real\\s+FLOOR\\s*=\\s*-?[0-9eE\\.]+\\s*;"
    stan_code_floor <- gsub(pat, sprintf("real FLOOR = %.0f;", floor_value),
                            stan_code, perl = TRUE)
    cmdstanr::cmdstan_model(
      cmdstanr::write_stan_file(stan_code_floor),
      cpp_options = list(stan_threads = TRUE),
      quiet = TRUE
    )
  }
  
  run_bma_with_model <- function(model, DT, spec, controls, k_grid, hs_grid,
                                 iter_warmup, iter_sampling, chains, seed) {
    
    param_grid <- tidyr::expand_grid(
      k = k_grid,
      hs_idx = seq_len(nrow(hs_grid))
    )
    param_grid$hs_tau0 <- hs_grid$hs_tau0[param_grid$hs_idx]
    param_grid$hs_slab_scale <- hs_grid$hs_slab_scale[param_grid$hs_idx]
    param_grid$hs_slab_df <- hs_grid$hs_slab_df[param_grid$hs_idx]
    param_grid$fit_id <- seq_len(nrow(param_grid))
    
    results <- lapply(seq_len(nrow(param_grid)), function(i) {
      row <- param_grid[i, ]
      
      fit_result <- fit_one_internal(
        DT = DT,
        k = as.integer(row$k),
        spec = spec,
        controls = controls,
        model = model,
        iter_warmup = iter_warmup,
        iter_sampling = iter_sampling,
        chains = chains,
        seed = seed + row$fit_id,
        hs_tau0 = row$hs_tau0,
        hs_slab_scale = row$hs_slab_scale,
        hs_slab_df = row$hs_slab_df,
        verbose = FALSE
      )
      
      log_lik <- tryCatch(
        as.matrix(fit_result$fit$draws("log_lik_joint", format = "draws_matrix")),
        error = function(e) {
          as.matrix(fit_result$fit$draws("log_lik", format = "draws_matrix"))
        }
      )
      log_lik[!is.finite(log_lik)] <- -1e10
      
      loo_obj <- tryCatch(
        loo::loo(log_lik, cores = 1),
        error = function(e) {
          T_eff <- ncol(log_lik)
          col_max <- apply(log_lik, 2, max)
          logmeanexp <- col_max + log(colMeans(exp(sweep(log_lik, 2, col_max))))
          elpd_est <- sum(logmeanexp)
          elpd_se <- stats::sd(logmeanexp) * sqrt(T_eff)
          list(estimates = matrix(c(elpd_est, elpd_se), nrow = 1,
                                  dimnames = list("elpd_loo", c("Estimate", "SE"))))
        }
      )
      
      list(
        fit_id = row$fit_id,
        k = row$k,
        hs_tau0 = row$hs_tau0,
        hs_slab_scale = row$hs_slab_scale,
        hs_slab_df = row$hs_slab_df,
        elpd = as.numeric(loo_obj$estimates["elpd_loo", "Estimate"]),
        elpd_se = as.numeric(loo_obj$estimates["elpd_loo", "SE"])
      )
    })
    
    dplyr::bind_rows(results)
  }
  
  out_tabs <- list()
  rank_keys <- character(0)
  
  for (fv in floors) {
    if (verbose) message("Testing FLOOR = ", fv)
    
    model_floor <- build_model_with_floor(stan_code, fv)
    
    tab <- run_bma_with_model(
      model = model_floor,
      DT = DT,
      spec = spec,
      controls = controls,
      k_grid = k_grid,
      hs_grid = hs_grid_subset,
      iter_warmup = iter_warmup,
      iter_sampling = iter_sampling,
      chains = chains,
      seed = seed
    )
    
    tab$FLOOR <- fv
    key <- paste(tab$fit_id[order(-tab$elpd)], collapse = "-")
    rank_keys <- c(rank_keys, key)
    tab$rank_elpd <- rank(-tab$elpd, ties.method = "first")
    out_tabs[[as.character(fv)]] <- tab
  }
  
  same_order <- length(unique(rank_keys)) == 1L
  all_tabs <- dplyr::bind_rows(out_tabs)
  
  list(
    same_order = same_order,
    floors = floors,
    tables = out_tabs,
    combined = all_tabs,
    rank_signatures = rank_keys
  )
}
