#' Matrix inverse
#'
#' @param A Numeric matrix.
#' @return The inverse of A.
#' @keywords internal
inver <- function(A) {
  solve(as.matrix(A))
}


#' Matrix square root
#'
#' @param A Numeric matrix.
#' @return The square root of A as a numeric matrix.
#'   If the result is complex due to rounding, the real part is returned.
#' @importFrom pracma sqrtm
#' @keywords internal
sqrtmat <- function(A) {
  Re(sqrtm(as.matrix(A))$B)
}


#' Randomly generate an innovation covariance matrix \eqn{\Sigma_{\boldsymbol{\nu}}}
#'
#' @description
#' Generates a symmetric \eqn{p \times p} innovation covariance matrix
#' \eqn{\Sigma_{\boldsymbol{\nu}}} for the VAR(1) component in the proposed
#' model. The diagonal elements are fixed at 0.5, and a specified number of
#' off-diagonal elements are randomly assigned nonzero values to introduce
#' cross-correlation between variables.
#'
#' @param p Integer. Dimension of the covariance matrix (\eqn{p} variables).
#' @param num_nonzero Integer. Target number of nonzero off-diagonal entries
#' (counted individually; both upper and lower triangles are included).
#' Since nonzero values are inserted in symmetric pairs, an even value is
#' recommended. The maximum meaningful value is \eqn{p(p-1)}.
#'
#' @details
#' Each nonzero off-diagonal entry is placed in symmetric pairs
#' \eqn{(i,j)} and \eqn{(j,i)} to ensure symmetry of the matrix. The magnitudes
#' of the nonzero entries are randomly drawn from the set
#' \eqn{\{0.1, 0.2\}} with randomly assigned signs. The diagonal entries are
#' fixed at 0.5 to maintain a moderate level of innovation variance.
#'
#' In the full model, \eqn{\Sigma_{\boldsymbol{\nu}}} governs the variability
#' of the VAR(1) innovation term \eqn{\boldsymbol{\nu}_t} in
#' \eqn{\boldsymbol{\epsilon}_t = \Phi \boldsymbol{\epsilon}_{t-1} + \boldsymbol{\nu}_t}.
#'
#' @return
#' A numeric symmetric matrix of dimension \eqn{p \times p} representing
#' \eqn{\Sigma_{\boldsymbol{\nu}}} with diagonal 0.5 and approximately
#' `num_nonzero` nonzero off-diagonal entries.
#'
#'
#' @export
random_Signu <- function(p, num_nonzero){
  Sig_nu <- diag(rep(0.5, p), nrow = p)

  while ((sum(Sig_nu != 0) - p) < num_nonzero) {
    temp_mag <- sample(c(0.1, 0.2), 1)
    temp_sign <- sample(c(1, -1), 1)
    temp <- 1:p
    temp_row <- sample(temp, 1)
    temp <- temp[-temp_row]
    temp_col <- sample(temp, 1)

    Sig_nu[temp_row, temp_col] <- temp_mag*temp_sign
    Sig_nu[temp_col, temp_row] <- temp_mag*temp_sign
  }

  return(Sig_nu)
}


#' Randomly generate an autoregressive coefficient matrix \eqn{\Phi}
#'
#' @description
#' Generates a \eqn{p \times p} autoregressive coefficient matrix
#' \eqn{\Phi} for the VAR(1) component in the proposed model. The diagonal
#' entries are randomly chosen from \{0.5, -0.5\}, and a specified number of
#' off-diagonal elements are randomly assigned nonzero values to introduce
#' cross-dependence among variables.
#'
#' @param p Integer. Dimension of the square matrix (\eqn{p} variables).
#' @param num_nonzero Integer. Target number of nonzero off-diagonal
#' entries in \eqn{\Phi}.
#'
#' @details
#' The diagonal elements are sampled independently from the set
#' \eqn{\{0.5, -0.5\}}. Nonzero off-diagonal entries are then placed at random
#' positions until the total number of nonzero off-diagonal elements reaches
#' at least \code{num_nonzero}. Each nonzero off-diagonal element has magnitude
#' 0.1 or 0.2 with equal probability and a randomly assigned sign. The resulting
#' matrix \eqn{\Phi} governs the temporal dependence of the stationary VAR(1)
#' process
#' \deqn{\boldsymbol{\epsilon}_t = \Phi \boldsymbol{\epsilon}_{t-1} +
#' \boldsymbol{\nu}_t.}
#' @return
#' A numeric \eqn{p \times p} matrix representing the autoregressive
#' coefficient matrix \eqn{\Phi} with random diagonal entries in
#' \{0.5, -0.5\} and approximately `num_nonzero` nonzero off-diagonal
#' elements.
#'
#'
#' @export
random_Phi <- function(p, num_nonzero){
  Phi <- diag(sample(c(0.5, -0.5), p, replace = TRUE), nrow = p)

  while ((sum(Phi != 0) - p) < num_nonzero) {
    temp_mag <- sample(c(0.1, 0.2), 1)
    temp_sign <- sample(c(1, -1), 1)
    temp <- 1:p
    temp_row <- sample(temp, 1)
    temp <- temp[-temp_row]
    temp_col <- sample(temp, 1)

    Phi[temp_row, temp_col] <- temp_mag*temp_sign
  }

  return(Phi)
}


#' Approximate the long-run covariance matrix \eqn{\Gamma_{\boldsymbol{\epsilon}}(0)}
#'
#' @description
#' Computes an approximate long-run covariance matrix
#' \eqn{\Gamma_{\boldsymbol{\epsilon}}(0)} for the stationary VAR(1)
#' process
#' \deqn{\boldsymbol{\epsilon}_t = \Phi \boldsymbol{\epsilon}_{t-1} + \boldsymbol{\nu}_t,}
#' where \eqn{\boldsymbol{\nu}_t} has innovation covariance
#' \eqn{\Sigma_{\boldsymbol{\nu}}}. The approximation is obtained via a
#' truncated series expansion up to order `m`.
#'
#' @param Sig_nu Numeric \eqn{p \times p} matrix representing the innovation
#' covariance \eqn{\Sigma_{\boldsymbol{\nu}}}.
#' @param Phi Numeric \eqn{p \times p} autoregressive coefficient matrix
#' \eqn{\Phi}.
#' @param m Integer (default = 6). Number of lag terms used in the truncated
#' series expansion. Larger values yield higher accuracy but increase
#' computation time.
#'
#' @details
#' For a stable VAR(1) process, the theoretical long-run covariance satisfies
#' \deqn{\mathrm{vec}(\Gamma_{\boldsymbol{\epsilon}}(0)) =
#' (I_{p^2} - \Phi \otimes \Phi)^{-1} \mathrm{vec}(\Sigma_{\boldsymbol{\nu}}).}
#' To avoid matrix inversion, this function approximates the inverse by the
#' truncated Neumann series:
#' \deqn{(I_{p^2} - \Phi \otimes \Phi)^{-1} =
#' I_{p^2} + \sum_{i=1}^{m} (\Phi^{\otimes i}),}
#' where \eqn{\Phi^{\otimes i}} denotes the Kronecker product of
#' \eqn{\Phi^i} with itself. The truncation level `m` controls the
#' approximation accuracy.
#'
#' @return
#' A numeric \eqn{p \times p} matrix giving the approximate
#' \eqn{\Gamma_{\boldsymbol{\epsilon}}(0)}.
#'
#'
#' @export
get_Sig_e1_approx <- function(Sig_nu, Phi, m = 6) {
  Sig_nu <- as.matrix(Sig_nu)
  Phi <- as.matrix(Phi)
  p <- ncol(Phi)
  temp <- diag(rep(1,p^2), ncol = p^2)
  temp_Phi <- diag(rep(1,p), ncol = p)
  for (i in 1:m) {
    temp_Phi <- temp_Phi%*%Phi
    temp <- temp + kronecker(temp_Phi, temp_Phi)
  }

  Sig_e1_vec <- temp%*%c(Sig_nu)
  return (matrix(Sig_e1_vec, p, p))
}


#' Generate multivariate time series from the proposed model
#'
#' @description
#' Simulates a multivariate time series following the proposed model structure,
#' where the mean component evolves as a random walk with abrupt shifts,
#' overlaid by a stationary VAR(1) process to account for temporal and
#' cross-sectional correlations.
#'
#' Specifically, at each time point \eqn{t = 1, \ldots, n}, the data are
#' generated as
#' \deqn{\mathbf{y}_t = \boldsymbol{\mu}_t + \boldsymbol{\epsilon}_t,}
#' where, for \eqn{t = 2, \ldots, n},
#' \deqn{\boldsymbol{\mu}_t = \boldsymbol{\mu}_{t-1} + \boldsymbol{\eta}_t + \boldsymbol{\delta}_t,}
#' and
#' \deqn{\boldsymbol{\epsilon}_t = \Phi \boldsymbol{\epsilon}_{t-1} + \boldsymbol{\nu}_t.}
#'
#' Here, \eqn{\boldsymbol{\eta}_t} denotes the random walk innovation with
#' covariance \eqn{\Sigma_{\boldsymbol{\eta}}}, and
#' \eqn{\boldsymbol{\nu}_t} is the VAR(1) innovation with covariance
#' \eqn{\Sigma_{\boldsymbol{\nu}}}. The vector
#' \eqn{\boldsymbol{\delta}_t} is nonzero only at change points.
#'
#' @param mu0 Numeric vector of length \eqn{p}. The initial mean vector
#' \eqn{\boldsymbol{\mu}_0}.
#' @param deltas A list of numeric vectors, each representing the jump
#' magnitude \eqn{\boldsymbol{\delta}_t} at a change point.
#' @param Sig_eta Numeric \eqn{p \times p} covariance matrix
#' \eqn{\Sigma_{\boldsymbol{\eta}}} of the random walk innovation.
#' @param Sig_nu Numeric \eqn{p \times p} covariance matrix
#' \eqn{\Sigma_{\boldsymbol{\nu}}} of the VAR(1) innovation.
#' @param Phi Numeric \eqn{p \times p} autoregressive coefficient matrix
#' \eqn{\Phi}.
#' @param Sig_e1 Numeric \eqn{p \times p} initial-state covariance matrix
#' of the VAR(1) process.
#' @param errortype Character; either "n" (Gaussian) or "t" (Student's t)
#' specifying the distribution of the innovations.
#' @param df Degrees of freedom for the t-distribution (used only when
#' `errortype = "t"`). Default is 10.
#' @param number_cps Integer; number of change points (\eqn{m}).
#' @param lengthofeachpart Integer; number of observations between
#' consecutive change points (\eqn{\tau_{k+1} - \tau_k}).
#'
#' @details
#' The total length of the time series is given by
#' \eqn{n = (number\_cps + 1) \times lengthofeachpart}, so that the specified
#' change points partition the data into equally sized segments. When
#' \eqn{\Sigma_{\boldsymbol{\eta}} = 0}, the model reduces to a piecewise
#' constant mean process with no random walk component. When \eqn{\Phi = 0},
#' the process reduces to a random walk model without vector autoregressive
#' dependence. If both \eqn{\Sigma_{\boldsymbol{\eta}} = 0} and \eqn{\Phi = 0},
#' the model simplifies to the classical piecewise constant setting commonly
#' used in multiple change point analysis. The two innovation components are
#' generated independently.
#'
#' The innovations \eqn{\boldsymbol{\eta}_t} and \eqn{\boldsymbol{\nu}_t} are
#' drawn either from a multivariate normal distribution (when
#' \code{errortype = "n"}) using \code{\link[MASS]{mvrnorm}}, or from a
#' multivariate Student's t distribution (when \code{errortype = "t"}) using
#' \code{\link[SimDesign]{rmvt}}.
#'
#' @return
#' A numeric matrix of dimension \eqn{n \times p}, with
#' \eqn{n = (number\_cps+1)\,lengthofeachpart}, containing the simulated
#' observations \eqn{\{\mathbf{y}_t\}_{t=1}^n}.
#'
#' @examples
#' set.seed(123)
#' p <- 3
#' mu0 <- rep(0, p)
#' deltas <- list(c(3, 0, -3), c(-2, 4, 0))
#' Sig_eta <- diag(0.01, p)
#' Sig_nu  <- random_Signu(p, 0)
#' Phi <- random_Phi(p, p)
#' Sig_e1 <- get_Sig_e1_approx(Sig_nu, Phi)
#'
#' Y <- generate_data(mu0, deltas, Sig_eta, Sig_nu, Phi, Sig_e1,
#'                    errortype = "n", number_cps = 2, lengthofeachpart = 100)
#' dim(Y)
#'
#' @importFrom MASS mvrnorm
#' @importFrom SimDesign rmvt
#' @export
generate_data <- function(mu0, deltas, Sig_eta, Sig_nu, Phi, Sig_e1, errortype, df=10, number_cps, lengthofeachpart){
  p <- length(mu0)
  N <- lengthofeachpart*(number_cps+1)
  Sig_eta <- as.matrix(Sig_eta)
  Sig_nu <- as.matrix(Sig_nu)
  Phi <- as.matrix(Phi)
  Sig_e1 <- as.matrix(Sig_e1)

  if(errortype == "n"){
    errors_rw <- mvrnorm(n=N, rep(0,p), Sig_eta)
    errors_ar <- mvrnorm(n=N, rep(0,p), Sig_nu)
  }
  else if(errortype == "t"){
    errors_rw <- SimDesign::rmvt(n=N, sigma = Sig_eta, df = df)
    errors_ar <- SimDesign::rmvt(n=N, sigma = Sig_nu, df = df)
  }
  else{
    return("errortype should be either n or t!")
  }

  mus <- matrix(0, nrow = N, ncol = p)
  mus[1,] <- mu0 + errors_rw[1,]

  for(i in 2:lengthofeachpart){
    mus[i,] <- mus[i-1,] + errors_rw[i,]
  }

  index <- 1
  while(index <= number_cps){
    mus[(index*lengthofeachpart+1),] <- mus[index*lengthofeachpart,] + deltas[[index]] + errors_rw[(index*lengthofeachpart+1),]

    for(i in (index*lengthofeachpart + 2):((index+1)*lengthofeachpart)){
      mus[i,] <- mus[i-1,] + errors_rw[i,]
    }
    index <- index + 1
  }

  es <- matrix(0, nrow = N, ncol = p)

  if(errortype == "n"){
    es[1,] <- mvrnorm(n=1, rep(0,p), Sig_e1)
  }
  else if(errortype == "t"){
    es[1,] <- SimDesign::rmvt(n=1, sigma = Sig_e1, df = df)
  }
  else{
    stop("errortype should be either n or t!")
  }
  for(i in 2:N){
    es[i,] <- Phi%*%es[i-1,] + errors_ar[i,]
  }
  data <- mus + es
  return(data)
}

#' Add mean shifts to multivariate time series data
#'
#' @description
#' Adds constant mean shifts to a multivariate time series by applying a fixed
#' jump vector at evenly spaced change points. After each change point, all
#' subsequent observations are shifted by the specified vector.
#'
#' @param data Numeric matrix of dimension \eqn{n \times p}, representing the
#' time series data.
#' @param delta Numeric vector of length \eqn{p}, specifying the shift
#' magnitudes added to each variable after each change point.
#' @param num Integer; number of change points. The data are divided evenly
#' into \code{num} + 1 segments, and \code{delta} is added cumulatively after
#' each change point.
#'
#' @details
#' The total length of the time series is denoted by \eqn{n}. Change points are
#' placed at evenly spaced locations given by
#' \eqn{k \lfloor n / (num + 1) \rfloor}, for \eqn{k = 1, \ldots, num}. After each
#' change point, a constant shift vector \code{delta} is added to all subsequent
#' observations. This construction produces synthetic data with known and
#' controlled mean shifts, making the function useful for simulation studies and
#' benchmarking change point detection methods.
#'
#' @return
#' A numeric matrix of the same dimension as `data`, containing the adjusted
#' series with added mean shifts.
#'
#'
#' @export
add_jumps <- function(data, delta, num) {
  data <- as.matrix(data)
  n <- nrow(data)
  p <- ncol(data)
  seg_size <- floor(n/(num + 1))
  for (i in 1:num) {
    data[(i*seg_size + 1):n, ] <- t(t(data[(i*seg_size + 1):n, ]) + delta)
  }
  return(data)
}

#' Compute the covariance matrix \eqn{\Sigma_{\mathrm{ALL}}^*} for observations within a moving window
#'
#' @description
#' Calculates the covariance matrix \eqn{\Sigma_{\mathrm{ALL}}^*} for all
#' observations within a moving window of length \eqn{w}, given the random walk
#' innovation covariance \eqn{\Sigma_{\boldsymbol{\eta}}}, the VAR(1) innovation
#' covariance \eqn{\Sigma_{\boldsymbol{\nu}}}, the autoregressive coefficient
#' matrix \eqn{\Phi}, and the initial-state covariance matrix
#' \eqn{\Gamma_{\boldsymbol{\epsilon}}(0)} (denoted here by `Sig_e1`).
#' The resulting matrix accounts for both the random walk component and
#' the temporal dependence introduced by the VAR(1) structure.
#'
#' @param w Integer; window size.
#' @param p Integer; data dimension.
#' @param Sig_eta Numeric \eqn{p \times p} matrix representing the covariance
#' of the random walk innovation \eqn{\Sigma_{\boldsymbol{\eta}}}.
#' @param Sig_nu Numeric \eqn{p \times p} matrix representing the covariance
#' of the VAR(1) innovation \eqn{\Sigma_{\boldsymbol{\nu}}}.
#' @param Phi Numeric \eqn{p \times p} autoregressive coefficient matrix
#' \eqn{\Phi}.
#' @param Sig_e1 Numeric \eqn{p \times p} matrix representing the covariance
#' of the initial state \eqn{\Gamma_{\boldsymbol{\epsilon}}(0)}.
#'
#' @details
#' The function decomposes the overall covariance matrix
#' \eqn{\Sigma_{\mathrm{ALL}}^*} into two additive components corresponding to
#' the random walk contribution \eqn{\Sigma_{\mathrm{RW}}} and the
#' autoregressive contribution \eqn{\Sigma_{\mathrm{AR}}}, so that
#' \deqn{\Sigma_{\mathrm{ALL}}^* = \Sigma_{\mathrm{RW}} + \Sigma_{\mathrm{AR}}.}
#' When \eqn{p = 1}, the construction reduces to the scalar random walk and
#' AR(1) case, for which closed-form covariance expressions are available.
#' For higher-dimensional settings, block-matrix structures are constructed
#' using functions from the \pkg{blockmatrix} package to capture both
#' cross-sectional and temporal dependence. The returned inverse matrix
#' \eqn{(\Sigma_{\mathrm{ALL}}^*)^{-1}} is used in the main change point
#' detection algorithm to adjust for the effects of random walk drift and
#' vector autoregressive correlation.
#'
#' @return
#' A list with the following components:
#' \itemize{
#'   \item `Sig_AR` — Covariance contribution from the VAR(1) component
#'     (\eqn{\Sigma_{\mathrm{AR}}}).
#'   \item `Sig_RW` — Covariance contribution from the random walk component
#'     (\eqn{\Sigma_{\mathrm{RW}}}).
#'   \item `Sig_all` — Combined covariance matrix
#'     (\eqn{\Sigma_{\mathrm{ALL}}^* = \Sigma_{\mathrm{AR}} + \Sigma_{\mathrm{RW}}}).
#'   \item `Sig_all_inv` — Inverse of the combined covariance matrix
#'     \eqn{(\Sigma_{\mathrm{ALL}}^*)^{-1}}.
#' }
#'
#' @examples
#' set.seed(1)
#' p <- 3
#' w <- 20
#' Sig_eta <- diag(0.01, p)
#' Sig_nu  <- random_Signu(p, 0)
#' Phi <- random_Phi(p, p)
#' Sig_e1 <- get_Sig_e1_approx(Sig_nu, Phi)
#' res <- get_Sigs(w, p, Sig_eta, Sig_nu, Phi, Sig_e1)
#'
#' @importFrom blockmatrix blockmatrix
#' @export
get_Sigs <- function(w, p, Sig_eta, Sig_nu, Phi, Sig_e1){

  if(p == 1){
    Sig_AR <- matrix(0, w, w)
    Sig_RW <- matrix(0, w, w)
    for(i in 1:w){
      for(j in 1:w){
        Sig_AR[i,j] <- (Sig_nu/(1-Phi^2))*Phi^{abs(i-j)}
        Sig_RW[i,j] <- Sig_eta*min(i,j)
      }
    }
    Sig_all <- Sig_AR + Sig_RW
    Sig_all_inv <- inver(Sig_all)
    return(list(Sig_AR = Sig_AR, Sig_RW = Sig_RW, Sig_all = Sig_all, Sig_all_inv = Sig_all_inv))
  }

  Sig_etas <- vector(mode = "list", w)
  for(i in 1:w){
    Sig_etas[[i]] <- i*Sig_eta
  }

  Sig_RW_ijs <- vector(mode = "list", p^2)
  Sig_RW_names <- paste0(1:(p^2))
  for(i in 1:p){
    for(j in 1:p){
      temp <- matrix(0,w,w)
      for(k in 1:w){
        for(l in 1:w){
          temp[k,l] <- Sig_etas[[min(k,l)]][i,j]
        }
      }
      Sig_RW_ijs[[i+(j-1)*p]] <- temp
    }
  }
  names(Sig_RW_ijs) <- Sig_RW_names
  block_structure_RW <- array(Sig_RW_names, c(p,p))
  Sig_RW <- blockmatrix(value = block_structure_RW, list = Sig_RW_ijs)

  Gammas <- vector(mode = "list", w)
  Gammas[[1]] <- Sig_e1
  for(i in 2:w){
    Gammas[[i]] <- Phi%*%Gammas[[i-1]]
  }
  Sig_AR_ijs <- vector(mode = "list", p^2)
  Sig_AR_names <- paste0(1:(p^2))
  for(i in 1:p){
    for(j in 1:p){
      temp <- matrix(0,w,w)
      for(k in 1:w){
        for(l in 1:w){
          if(l >= k){
            temp[k,l] <- Gammas[[abs(k-l)+1]][j,i]
          }else{
            temp[k,l] <- Gammas[[abs(k-l)+1]][i,j]
          }
        }
      }
      Sig_AR_ijs[[i+(j-1)*p]] <- temp
    }
  }
  names(Sig_AR_ijs) <- Sig_AR_names
  block_structure_AR <- array(Sig_AR_names, c(p,p))
  Sig_AR <- blockmatrix(value = block_structure_AR, list = Sig_AR_ijs)

  Sig_all <- Sig_AR + Sig_RW
  Sig_all <- as.matrix(Sig_all)
  Sig_all_inv <- inver(Sig_all)
  Sig_all_inv <- as.matrix(Sig_all_inv)
  return(list(Sig_AR = as.matrix(Sig_AR), Sig_RW = as.matrix(Sig_RW), Sig_all = Sig_all, Sig_all_inv = Sig_all_inv))
}


#' Objective function for robust parameter estimation (RPE)
#'
#' @description
#' Computes the objective value minimized in the robust parameter estimation
#' (RPE) procedure.
#'
#' @param parameters Numeric vector of length three, containing the parameters
#' \eqn{(\sigma_{\eta}^2, \sigma_{\nu}^2, \phi)} to be estimated.
#' @param Var_ests Numeric vector of empirical variance estimates
#' \eqn{\{v^{(l)}\}_{l=1}^L} obtained from the \eqn{l}-lagged differences of the data.
#'
#' @details
#' For each lag \eqn{l = 1, \ldots, L}, the theoretical variance of the
#' \eqn{l}-lagged difference
#' \eqn{z^{(l)}_t = y_{t+l} - y_t} is calculated by
#' \deqn{V_l = l\sigma_{\eta}^2 +
#'       2\,\sigma_{\nu}^2 \frac{1 - \phi^l}{1 - \phi^2}.}
#' The function returns the sum of squared deviations between
#' \eqn{V_l} and the empirical variance estimates \eqn{v^{(l)}}:
#' \deqn{\sum_{l=1}^L \left(V_l - v^{(l)}\right)^2.}
#'
#'
#' @return
#' A numeric scalar representing the objective value.
#'
#'
#' @keywords internal
objective_func <- function(parameters, Var_ests){
  temp <- 0
  for(l in 1:length(Var_ests)){
    temp <- temp + (l*parameters[1] + 2*((1-parameters[3]^l)/(1-parameters[3]^2))*parameters[2] - Var_ests[l])^2
  }
  return(temp)
}


#' Robust parameter estimation (RPE) for univariate time series
#'
#' @description
#' Implements the robust parameter estimation (RPE) procedure to estimate
#' the parameters \eqn{\sigma_{\eta}^2}, \eqn{\sigma_{\nu}^2}, and \eqn{\phi}
#' in the univariate version of the proposed model.
#' The method is based on minimizing the objective function defined in
#' \code{\link{objective_func}}, using variance estimates computed from
#' lagged differences of the data.
#'
#' @param data Numeric vector containing the univariate time series
#' observations \eqn{y_{1:n}}.
#' @param L Integer; number of lag differences used in the estimation
#' (default = 15).
#' @param phiLower,phiUpper Numeric; lower and upper bounds for the
#' autoregressive coefficient \eqn{\phi}.
#' @param sigetaLower,sigetaUpper Numeric; lower and upper bounds for
#' \eqn{\sigma_{\eta}^2}, the random walk innovation variance.
#' @param signuLower,signuUpper Numeric; lower and upper bounds for
#' \eqn{\sigma_{\nu}^2}, the VAR(1) innovation variance.
#' @param num_inis Integer; number of initial values of \eqn{\phi} used for
#' grid search initialization (default = 20).
#' @param CPs Optional numeric vector of change point locations (indices).
#' If provided, differenced data crossing these points are removed to
#' improve the robustness of the variance estimation in the presence of
#' structural breaks.
#'
#' @details
#' For each lag \eqn{l = 1, \ldots, L}, the function computes the variance of
#' the \eqn{l}-lagged differences
#' \eqn{z^{(l)}_t = y_{t+l} - y_t} using the median absolute deviation (MAD).
#' If change points (\code{CPs}) are specified, all differences that overlap
#' a change point are excluded from the computation. The resulting empirical
#' variances \eqn{\{v^{(l)}\}_{l=1}^L} are then used to construct the following
#' optimization problem:
#' \deqn{\sum_{l=1}^L \left(l\sigma^2_{\eta} +
#' 2\frac{1-\phi^l}{1-\phi^2}\sigma^2_{\nu} - v^{(l)}\right)^2,}
#' which is solved via bounded optimization using \code{optim()} with the
#' L-BFGS-B algorithm. Initial parameter values are obtained using
#' non-negative least squares (NNLS) regression over a grid of \eqn{\phi}
#' values.
#'
#' @return
#' A list with elements:
#' \itemize{
#'   \item `sigeta_est` — Estimated \eqn{\sigma_{\eta}^2}.
#'   \item `signu_est` — Estimated \eqn{\sigma_{\nu}^2}.
#'   \item `phi_est` — Estimated autoregressive coefficient \eqn{\phi}.
#'   \item `inis` — Initial parameter values used in optimization.
#' }
#'
#' @importFrom nnls nnls
#' @importFrom stats mad optim
#' @keywords internal
estimate_RWVAR_cp <- function(data, L = 15, phiLower = -.8, phiUpper = .8, sigetaLower = 0, sigetaUpper = Inf, signuLower = 1e-06, signuUpper = Inf, num_inis = 20, CPs = NULL){
  n <- length(data)
  Var_ests <- rep(0, L)

  for(l in 1:L){
    data_differences <- data[(l+1):n] - data[1:(n-l)]
    if (!is.null(CPs)) {
      index_remove <- NULL
      for (CP in CPs) {
        temp <- (CP - l + 1):CP
        temp <- temp[which(temp > 0)]
        index_remove <- append(index_remove, temp)
      }
      index_remove <- unique(index_remove)
      data_differences <- data_differences[-index_remove]
    }
    Var_ests[l] <- mad(data_differences)^2
  }


  sigetaX <- 1:L

  errors <- rep(0, num_inis)
  sigeta_inis <- rep(0, num_inis)
  signu_inis <- rep(0, num_inis)
  phi_inis <- seq(phiLower, phiUpper, length.out = num_inis)
  for(i in 1:num_inis){
    signuX <- rep(0, L)
    for(l in 1:L){
      signuX[l] <- 2*((1-phi_inis[i]^l)/(1-phi_inis[i]^2))
    }
    A <- cbind(sigetaX, signuX)
    reg <- nnls(A, Var_ests)
    errors[i] <- reg$deviance
    sigeta_inis[i] <- reg$x[1]
    signu_inis[i] <- reg$x[2]
  }
  index <- which(errors == min(errors, na.rm = TRUE))[1]
  sigeta_ini <- sigeta_inis[index]
  signu_ini <- signu_inis[index]
  phi_ini <- phi_inis[index]

  inis <- c(sigeta_ini, signu_ini, phi_ini)
  res <- optim(par = inis, fn = objective_func, lower = c(sigetaLower, signuLower, phiLower), upper = c(sigetaUpper, signuUpper, phiUpper), Var_ests = Var_ests, method = "L-BFGS-B")

  return(list(sigeta_est = as.numeric(res$par[1]), signu_est = as.numeric(res$par[2]), phi_est = as.numeric(res$par[3]), inis = inis))
}


#' Robust parameter estimation (RPE) for multivariate time series
#'
#' @description
#' Applies the robust parameter estimation (RPE) procedure componentwise
#' to a multivariate time series in order to estimate the diagonal elements
#' of \eqn{\Sigma_{\boldsymbol{\eta}}}, \eqn{\Sigma_{\boldsymbol{\nu}}}, and
#' \eqn{\Phi}.
#'
#' @param data Numeric matrix of dimension \eqn{n \times p}, representing
#' the multivariate time series \eqn{\{\mathbf{y}_t\}_{t=1}^n}.
#' @param L Integer; number of lag differences used in each univariate RPE
#' estimation (default = 15).
#' @param phiLower,phiUpper Numeric; lower and upper bounds for the
#' autoregressive coefficient \eqn{\phi}.
#' @param sigetaLower,sigetaUpper Numeric; lower and upper bounds for
#' \eqn{\sigma_{\eta}^2}, the random walk innovation variance.
#' @param signuLower,signuUpper Numeric; lower and upper bounds for
#' \eqn{\sigma_{\nu}^2}, the VAR(1) innovation variance.
#' @param num_inis Integer; number of initial values of \eqn{\phi} used for
#' grid search initialization (default = 20).
#' @param CPs Optional numeric vector of change point locations (indices).
#' If provided, differenced data overlapping these points are removed for
#' more robust estimation.
#'
#' @details
#' This function performs the RPE procedure for each variable (column)
#' in `data` independently, using \code{\link{estimate_RWVAR_cp}} as the
#' univariate estimator. The resulting estimates are combined into
#' diagonal matrices:
#' \itemize{
#'   \item \eqn{\Sigma_{\boldsymbol{\nu}}} — estimated innovation covariance
#'     of the VAR(1) component.
#'   \item \eqn{\Sigma_{\boldsymbol{\eta}}} — estimated innovation covariance
#'     of the random walk component.
#'   \item \eqn{\Phi} — estimated autoregressive coefficient matrix.
#' }
#'
#'
#' @return
#' A list containing:
#' \itemize{
#'   \item `Sig_nu` — Diagonal matrix of estimated \eqn{\sigma_{\nu,i}^2}.
#'   \item `Sig_eta` — Diagonal matrix of estimated \eqn{\sigma_{\eta,i}^2}.
#'   \item `Phi` — Diagonal matrix of estimated autoregressive coefficients
#'     \eqn{\phi_i}.
#' }
#'
#' @examples
#' set.seed(123)
#' p <- 3
#'
#' # True (diagonal) parameters for simulation
#' mu0    <- rep(0, p)
#' Sig_eta <- diag(0.01, p)
#' Sig_nu  <- random_Signu(p, 0)   # diagonal here since num_nonzero = 0
#' Phi     <- random_Phi(p, 0)     # diagonal here since num_nonzero = 0
#' Sig_e1  <- get_Sig_e1_approx(Sig_nu, Phi)
#'
#' # Two evenly spaced change points
#' deltas <- list(c(3, 0, -3), c(-2, 4, 0))
#' Y <- generate_data(mu0, deltas, Sig_eta, Sig_nu, Phi, Sig_e1,
#'                    errortype = "n", number_cps = 2, lengthofeachpart = 100)
#'
#' # Provide CP locations to remove affected differences in RPE
#' CPs <- c(100, 200)
#'
#' # Componentwise robust parameter estimation
#' fit <- estimate_RWVAR_cp_heter(Y, L = 15, CPs = CPs)
#'
#' # Estimated diagonal matrices:
#' fit$Sig_eta
#' fit$Sig_nu
#' fit$Phi
#'
#' @export
estimate_RWVAR_cp_heter <- function(data, L = 15, phiLower = -.8, phiUpper = .8, sigetaLower = 0, sigetaUpper = Inf, signuLower = 1e-06, signuUpper = Inf, num_inis = 20, CPs = NULL){
  data <- as.matrix(data)
  p <- ncol(data)
  est_phi <- rep(0,p)
  est_sigma2_eta <- rep(0,p)
  est_sigma2_nu <- rep(0,p)
  for(i in 1:p){
    est <- estimate_RWVAR_cp(data[,i], L, phiLower, phiUpper, sigetaLower, sigetaUpper, signuLower, signuUpper, num_inis, CPs)
    est_phi[i] <- est$phi_est
    est_sigma2_eta[i] <- est$sigeta_est
    est_sigma2_nu[i] <- est$signu_est
  }
  return(list(Sig_nu = diag(est_sigma2_nu, nrow = p), Sig_eta = diag(est_sigma2_eta, nrow = p), Phi = diag(est_phi, nrow = p)))
}


#' Core change point detection algorithm (given known parameters)
#'
#' @description
#' Implements the core step of the proposed change point
#' detection (CPD) algorithm to estimate the locations of change points,
#' given the inverse windowed covariance \eqn{\Sigma_{\mathrm{ALL}}^{*-1}}.
#' The method computes detector statistics over a moving window using a
#' projection-based quadratic form and identifies candidate change points
#' via peak detection.
#'
#' @param data Numeric matrix of dimension \eqn{n \times p}, the multivariate
#' time series.
#' @param Sig_all_inv Numeric matrix of dimension \eqn{(p w) \times (p w)},
#' the inverse of the combined covariance \eqn{\Sigma_{\mathrm{ALL}}^*}
#' (accounts for random walk and VAR(1) effects within a window of size \eqn{w}).
#' @param w Integer; window size used in the moving-window detection step.
#' @param D Numeric; detection threshold used in the peak-finding step.
#' @param needReproduce Logical; if \code{TRUE}, a fixed fold assignment is
#' used in cross-validation to ensure reproducibility (default \code{FALSE}).
#'
#' @details
#' For each center index \eqn{k}, a window of width \eqn{w} is formed and
#' contrast vectors are constructed to compare the first and second halves of
#' the window. Before computing the detector statistic, a component-selection
#' step is performed using an \eqn{\ell_1}-penalized regression (lasso, via
#' \pkg{glmnet}) with weights \eqn{\Sigma_{\mathrm{ALL}}^{*-1}} to identify
#' variables that exhibit a shift. The resulting active set determines the
#' projection used in the statistic. Sparse projection matrices indexed by the
#' active-set pattern are cached and reused for computational efficiency. The
#' detector statistic is defined as a weighted quadratic form measuring
#' deviation from the baseline (no-change) projection, and locations at which
#' the statistic exceeds the threshold \code{D} are declared as estimated
#' change points.
#'
#' @return
#' A list with:
#' \itemize{
#'   \item `shiftIndices` — Binary matrix (\eqn{n \times p}) indicating selected
#'         components at each index.
#'   \item `detectorStats` — Numeric vector of detector values over time.
#'   \item `Projection_list` — Cache of projection matrices by active-set pattern.
#'   \item `cps` — Indices of detected change points.
#' }
#'
#' @examples
#' ## Minimal runnable example (fast)
#' set.seed(123)
#' p <- 1
#' mu0 <- rep(0, p)
#' deltas <- list(c(3), c(4))
#' Sig_eta <- diag(0.01, p)
#' Sig_nu  <- random_Signu(p, 0)
#' Phi     <- random_Phi(p, 0)
#' Sig_e1  <- get_Sig_e1_approx(Sig_nu, Phi)
#'
#' # Simulate data with two evenly spaced change points
#' Y <- generate_data(mu0, deltas, Sig_eta, Sig_nu, Phi, Sig_e1,
#'                    errortype = "n", number_cps = 2,
#'                    lengthofeachpart = 100)
#'
#' # Windowed covariance and its inverse
#' w <- 20
#' Sigs <- get_Sigs(w, p, Sig_eta, Sig_nu, Phi, Sig_e1)
#' Sig_all_inv <- Sigs$Sig_all_inv
#'
#' # Run detector with a common threshold choice
#' n <- nrow(Y)
#' D <- min(4, log(exp(2) + p)) * log(n - w)
#' res <- FluxPoint_raw(Y, Sig_all_inv, w, D)
#' res$cps
#'
#' \donttest{
#' ## More realistic example (may take longer)
#' set.seed(123)
#' p <- 3
#' mu0 <- rep(0, p)
#' deltas <- list(c(3, 0, -3), c(0, -2, 4))
#' Sig_eta <- diag(0.01, p)
#' Sig_nu  <- random_Signu(p, 0)
#' Phi     <- random_Phi(p, p)
#' Sig_e1  <- get_Sig_e1_approx(Sig_nu, Phi)
#'
#' Y <- generate_data(mu0, deltas, Sig_eta, Sig_nu, Phi, Sig_e1,
#'                    errortype = "n", number_cps = 2,
#'                    lengthofeachpart = 100)
#'
#' w <- 20
#' Sigs <- get_Sigs(w, p, Sig_eta, Sig_nu, Phi, Sig_e1)
#' Sig_all_inv <- Sigs$Sig_all_inv
#'
#' n <- nrow(Y)
#' D <- min(4, log(exp(2) + p)) * log(n - w)
#' res <- FluxPoint_raw(Y, Sig_all_inv, w, D)
#' res$cps
#' }
#'
#' @importFrom glmnet cv.glmnet glmnet
#' @importFrom pracma findpeaks
#' @export
FluxPoint_raw <- function(data, Sig_all_inv, w, D, needReproduce = FALSE) {
  data <- as.matrix(data)
  n <- nrow(data)
  p <- ncol(data)

  if (p != 1) {
    Sig_all_inv_half <- sqrtmat(Sig_all_inv)
  }

  if (needReproduce) {
    foldid <- sort(rep(1:10, length.out = w*p))
  }
  else {
    foldid <- NULL
  }

  detectorStats <- rep(0, n)
  shiftIndices <- matrix(0, n, p)

  u0 <- rep(1, w)
  u0 <- as.matrix(u0)
  usplit <- append(rep(0,w/2),rep(1,w/2))
  usplit <- as.matrix(usplit)
  I <- diag(1,p)
  X0 <- kronecker(I,u0)
  projection0 <- Sig_all_inv%*%X0%*%inver(t(X0)%*%Sig_all_inv%*%X0)%*%t(X0)%*%Sig_all_inv
  X_tau <- cbind(kronecker(I, u0), kronecker(I, usplit))

  if (p != 1) {
    X_trans <- Sig_all_inv_half%*%X_tau
  }

  Projection_list <- NULL


  for (k in (w/2):(n - (w/2))) {
    data_in_use <- data[(k - (w/2) + 1):(k + (w/2)), ]
    data_in_use <- c(data_in_use)

    if (p != 1) {
      y_trans <- Sig_all_inv_half%*%data_in_use

      lambda     <- cv.glmnet(
        X_trans, y_trans,
        family         = "gaussian",
        alpha          = 1,
        intercept      = FALSE,
        penalty.factor = append(rep(0, p), rep(1, p)),
        foldid         = foldid
      )

      fit    <- glmnet(
        X_trans, y_trans,
        family         = "gaussian",
        alpha          = 1,
        intercept      = FALSE,
        lambda         = lambda$lambda.1se,
        penalty.factor = append(rep(0, p), rep(1, p))
      )
      shiftIndex <- as.numeric(as.vector(fit$beta)[(p + 1):(2*p)] != 0)
      shiftIndices[k, ] <- shiftIndex
    }
    else {
      shiftIndex <- 1
      shiftIndices[k, ] <- shiftIndex
    }

    s <- sum(shiftIndex)
    if (s != 0){
      shiftIndexname <- paste(shiftIndex, collapse = "")

      if (is.null(Projection_list[[shiftIndexname]])) {
        Ips <- matrix(0, p, s)
        for (j in 1:s) {
          Ips[which(shiftIndex == 1)[j], j] <- 1
        }
        X_tau <- cbind(kronecker(I, u0), kronecker(Ips, usplit))
        Projection_list[[shiftIndexname]] <- Sig_all_inv%*%X_tau%*%inver(t(X_tau)%*%Sig_all_inv%*%X_tau)%*%t(X_tau)%*%Sig_all_inv
      }

      detectorStats[k] <- as.numeric(t(data_in_use)%*%(Projection_list[[shiftIndexname]] - projection0)%*%data_in_use)
    }

  }

  cps <- pracma::findpeaks(detectorStats, minpeakheight = D, minpeakdistance = w + 1)[, 2]
  cps <- sort(cps)
  return(list(shiftIndices = shiftIndices, detectorStats = detectorStats, Projection_list = Projection_list, cps = cps))
}


#' Estimate the fluctuating mean sequence via maximum likelihood
#'
#' @description
#' Implements the maximum likelihood estimation (MLE) procedure for the
#' fluctuating mean sequence \eqn{\{\boldsymbol{\mu}_t\}_{t=1}^n} in the
#' proposed model, given the parameters (or their estimates)
#' \eqn{\Sigma_{\boldsymbol{\eta}}}, \eqn{\Sigma_{\boldsymbol{\nu}}},
#' \eqn{\Phi}, and \eqn{\Gamma_{\boldsymbol{\epsilon}}(0)}.
#'
#' @param data Numeric matrix of dimension \eqn{n \times p}, representing
#' the observed time series \eqn{\{\mathbf{y}_t\}_{t=1}^n}.
#' @param Sig_eta Numeric \eqn{p \times p} matrix
#' \eqn{\Sigma_{\boldsymbol{\eta}}}, covariance of the random walk
#' innovation.
#' @param Sig_nu Numeric \eqn{p \times p} matrix
#' \eqn{\Sigma_{\boldsymbol{\nu}}}, covariance of the VAR(1) innovation.
#' @param Phi Numeric \eqn{p \times p} autoregressive coefficient matrix
#' \eqn{\Phi}.
#' @param Sig_e1 Numeric \eqn{p \times p} initial-state covariance matrix
#' \eqn{\Gamma_{\boldsymbol{\epsilon}}(0)}.
#'
#' @details
#' The algorithm performs forward and backward recursions to compute
#' the MLE of \eqn{\boldsymbol{\mu}_t} under the proposed model with Gaussian noises:
#' \deqn{\mathbf{y}_t = \boldsymbol{\mu}_t + \boldsymbol{\epsilon}_t, \quad
#'         \boldsymbol{\mu}_t = \boldsymbol{\mu}_{t-1} + \boldsymbol{\eta}_t, \quad
#'         \boldsymbol{\epsilon}_t = \Phi \boldsymbol{\epsilon}_{t-1} + \boldsymbol{\nu}_t.}
#'
#' This estimation provides the smoothed mean trajectory that captures
#' gradual fluctuations between change points, conditioned on the given
#' model parameters.
#'
#' @return
#' A numeric matrix of dimension \eqn{n \times p}, containing the estimated
#' fluctuating mean vectors \eqn{\hat{\boldsymbol{\mu}}_t}.
#'
#'
#' @keywords internal
estimate_mus <- function(data, Sig_eta, Sig_nu, Phi, Sig_e1){
  # Sig_eta Sig_nu Phi Sig_e1 p by p
  data <- as.matrix(data)
  n <- nrow(data)
  p <- ncol(data)
  muhats <- matrix(0, nrow = n, ncol = p)
  As <- replicate(n, matrix(0, nrow = p, ncol = p), simplify = FALSE)
  bs <- replicate(n, matrix(0, nrow = 1, ncol = p), simplify = FALSE)

  if(p != 1){
    diag(Sig_eta) <- diag(Sig_eta) + 1e-12
  }
  else{
    Sig_eta <- matrix(Sig_eta + 1e-12, 1, 1)
  }

  Sig_eta_inv <- inver(Sig_eta)
  Sig_nu_inv <- inver(Sig_nu)
  Sig_nu_invPhi <- Sig_nu_inv%*%Phi
  PhitSig_nu_invPhi <- t(Phi)%*%Sig_nu_invPhi

  #forwards

  As[[1]] <- inver(Sig_e1)
  bs[[1]] <- (-2)*t(data[1, ])%*%As[[1]]

  for (t in 2:n) {
    temp0 <- inver(Sig_eta_inv + PhitSig_nu_invPhi + As[[t-1]])
    temp1 <- (Sig_eta_inv + Sig_nu_invPhi)
    temp2 <- 2*t(data[t, ] - Phi%*%data[t-1, ])%*%Sig_nu_inv
    temp3 <- temp2%*%Phi + bs[[t-1]]

    As[[t]] <- Sig_eta_inv + Sig_nu_inv - temp1%*%temp0%*%t(temp1)
    bs[[t]] <- temp3%*%temp0%*%t(temp1) - temp2

  }

  #backwards
  muhats[n, ] <- (-1/2)*inver(As[[n]])%*%t(bs[[n]])
  for (t in (n-1):1) {
    muhats[t, ] <- (-1/2)*inver(As[[t]] + Sig_eta_inv + PhitSig_nu_invPhi)%*%t(bs[[t]] - 2*t(muhats[t+1, ])%*%Sig_eta_inv + 2*t(data[t+1, ] - muhats[t+1, ] - Phi%*%data[t, ])%*%Sig_nu_invPhi)
  }

  return(muhats)
}



#' Estimate fluctuating mean segmentwise given detected change points
#'
#' @description
#' Estimates the fluctuating mean sequence \eqn{\{\boldsymbol{\mu}_t\}_{t=1}^n}
#' segmentwise by applying the maximum likelihood estimation (MLE) procedure
#' within each segment defined by detected change points.
#'
#' @param data Numeric matrix of dimension \eqn{n \times p}, representing
#' the multivariate time series \eqn{\{\mathbf{y}_t\}_{t=1}^n}.
#' @param cps Numeric vector of detected change point locations (sorted indices).
#' @param Sig_eta Numeric \eqn{p \times p} covariance matrix
#' \eqn{\Sigma_{\boldsymbol{\eta}}} of the random walk innovation.
#' @param Sig_nu Numeric \eqn{p \times p} covariance matrix
#' \eqn{\Sigma_{\boldsymbol{\nu}}} of the VAR(1) innovation.
#' @param Phi Numeric \eqn{p \times p} autoregressive coefficient matrix
#' \eqn{\Phi}.
#' @param Sig_e1 Numeric \eqn{p \times p} initial-state covariance matrix
#' \eqn{\Gamma_{\boldsymbol{\epsilon}}(0)}.
#'
#' @details
#' The time series is partitioned into contiguous segments defined by the
#' specified change points. Within each segment,
#' \code{\link{estimate_mus}} is applied to obtain the maximum likelihood
#' estimate of the fluctuating mean sequence for that interval. The resulting
#' segment-wise estimates are then concatenated to form a complete piecewise
#' estimate of \eqn{\boldsymbol{\mu}_t} over the entire time series.
#'
#'
#' @return
#' A numeric matrix of dimension \eqn{n \times p}, containing the estimated
#' fluctuating mean sequence across all segments.
#'
#' @examples
#' set.seed(123)
#' p <- 3
#' mu0 <- rep(0, p)
#' deltas <- list(c(3, 0, -3), c(-2, 4, 0))
#' Sig_eta <- diag(0.01, p)
#' Sig_nu  <- random_Signu(p, 0)
#' Phi     <- random_Phi(p, p)
#' Sig_e1  <- get_Sig_e1_approx(Sig_nu, Phi)
#'
#' # Generate data and estimate mean segmentwise after known CPs
#' Y <- generate_data(mu0, deltas, Sig_eta, Sig_nu, Phi, Sig_e1,
#'                    errortype = "n", number_cps = 2, lengthofeachpart = 100)
#' cps <- c(100, 200)
#' mu_seg <- estimate_musseg(Y, cps, Sig_eta, Sig_nu, Phi, Sig_e1)
#' dim(mu_seg)
#'
#' @export
estimate_musseg <- function(data, cps, Sig_eta, Sig_nu, Phi, Sig_e1){
  data <- as.matrix(data)
  n <- nrow(data)
  p <- ncol(data)
  muhats <- matrix(0, n, p)
  cps <- append(0, cps)
  cps <- append(cps, n)
  for (index in 2:length(cps)) {
    data_in_use <- as.matrix(data[(cps[index-1] + 1):cps[index], ])
    muhats[(cps[index-1] + 1):cps[index], ] <- estimate_mus(data_in_use , Sig_eta, Sig_nu, Phi, Sig_e1)
  }
  return(muhats)
}


#' Estimate non-diagonal VAR(1) parameters after mean removal
#'
#' @description
#' Estimates the non-diagonal autoregressive coefficient matrix
#' \eqn{\Phi} and innovation covariance matrix \eqn{\Sigma_{\boldsymbol{\nu}}}
#' for the residual process obtained after removing the estimated
#' fluctuating mean from the data. The estimation applies the
#' Lasso to encourage sparsity in the cross-variable dependence structure.
#'
#' @param epsilons Numeric matrix of dimension \eqn{n \times p},
#' representing the estimated residuals
#' \eqn{\boldsymbol{\epsilon}_t = \mathbf{y}_t - \hat{\boldsymbol{\mu}}_t}.
#' @param Sig_nu_diag Numeric \eqn{p \times p} diagonal matrix providing
#' initial (diagonal) estimates of \eqn{\Sigma_{\boldsymbol{\nu}}}.
#' @param Phi_diag Numeric \eqn{p \times p} diagonal matrix providing
#' initial (diagonal) estimates of \eqn{\Phi}.
#' @param replace_diag Logical; if \code{TRUE}, replaces the diagonal
#' entries of the estimated matrices with those from
#' \code{Sig_nu_diag} and \code{Phi_diag} (default \code{FALSE}).
#' @param needReproduce Logical; if \code{TRUE}, uses fixed fold assignments
#' in cross-validation to ensure reproducibility (default \code{FALSE}).
#'
#' @details
#' The function applies a Lasso-penalized VAR(1) fit to the residual
#' process \eqn{\boldsymbol{\epsilon}_t} to estimate cross-dependencies
#' among variables.
#' The fitting is performed using the function
#' \code{fitVAR()}, which is adapted from the \pkg{sparsevar} package.
#' When \code{replace_diag = TRUE}, the diagonal entries of
#' \eqn{\Phi} and \eqn{\Sigma_{\boldsymbol{\nu}}} are replaced by
#' their componentwise estimates obtained in Phase I for improved
#' numerical stability.
#'
#'
#' @return
#' A list containing:
#' \itemize{
#'   \item `Phi_hat` — Estimated non-diagonal autoregressive matrix
#'     \eqn{\Phi}.
#'   \item `Sig_nu_hat` — Estimated non-diagonal innovation covariance
#'     matrix \eqn{\Sigma_{\boldsymbol{\nu}}}.
#' }
#'
#'
#' @export
estimatePhinu_nondiag <- function(epsilons, Sig_nu_diag, Phi_diag, replace_diag = FALSE, needReproduce = FALSE){
  # epsilons n by p
  p <- ncol(epsilons)
  fit <- fitVAR(epsilons, needReproduce = needReproduce)
  Phi_hat <- fit$A[[1]]
  Sig_nu_hat <- fit$sigma + diag(1e-06, nrow = p, ncol = p)

  if (replace_diag) {
    diag(Phi_hat) <- diag(Phi_diag)
    diag(Sig_nu_hat) <- diag(Sig_nu_diag)
  }

  return(list(Phi_hat = Phi_hat, Sig_nu_hat = Sig_nu_hat))
}


#' FluxPoint change point detection algorithm
#'
#' @description
#' Implements the full FluxPoint algorithm for detecting multiple change points
#' in multivariate time series with non-stationary dynamics and cross-correlations.
#' The procedure iteratively estimates model parameters and change point
#' locations, alternating between parameter estimation and detection
#' steps until convergence.
#'
#' @param data Numeric matrix of dimension \eqn{n \times p} containing the
#'   observed multivariate time series.
#' @param w Integer specifying the window size used by the detector.
#' @param tc Numeric tuning constant used in the detection threshold
#'   \eqn{D = \texttt{tc} \cdot \min(4, \log(e^2 + p)) \cdot \log(n - w)}.
#' @param max_iter1 Integer specifying the maximum number of iterations for
#'   the first-stage loop, which alternates between diagonal robust parameter
#'   estimation and change point detection.
#' @param max_iter2 Integer specifying the maximum number of iterations for
#'   the second-stage refinement loop, which incorporates non-diagonal
#'   vector autoregressive updates.
#' @param ignoreCross Logical; if \code{TRUE}, the algorithm terminates after
#'   the first stage and treats the components of the time series as independent.
#' @param noeta Logical; if \code{TRUE}, forces \eqn{\Sigma_{\boldsymbol{\eta}} = 0}
#'   and performs change point detection without accounting for random walk
#'   fluctuations in the mean.
#' @param nophi Logical; if \code{TRUE}, forces \eqn{\Phi = 0} and performs
#'   change point detection without accounting for temporal dependence. This
#'   option should only be used when \code{ignoreCross = TRUE}.
#' @param needReproduce Logical; if \code{TRUE}, fixed folds are used in
#'   internal cross-validation steps to improve reproducibility.
#'
#' @details
#' The algorithm proceeds through the following stages:
#' \enumerate{
#'   \item \emph{Stage I (diagonal estimation):} Robust parameter
#'   estimation is performed to obtain diagonal estimates of
#'   \eqn{\Sigma_{\boldsymbol{\eta}}}, \eqn{\Sigma_{\boldsymbol{\nu}}}, and
#'   \eqn{\Phi}. These estimates are used to construct the windowed covariance
#'   matrix \eqn{\Sigma_{\mathrm{ALL}}^{*}} and its inverse. Change point
#'   detection is then carried out using the resulting detector statistic.
#'   The estimation and detection steps are iterated until the detected change
#'   points stabilize or \code{max_iter1} is reached.
#'
#'   \item \emph{Stage II (refinement with cross-correlation):} If enabled,
#'   the fluctuating mean is estimated segmentwise and removed from the data.
#'   A sparse vector autoregressive model is then fitted to the residuals to
#'   obtain non-diagonal estimates of \eqn{\Phi} and
#'   \eqn{\Sigma_{\boldsymbol{\nu}}}. The covariance matrix
#'   \eqn{\Sigma_{\mathrm{ALL}}^{*}} is recomputed and change point detection
#'   is rerun. This refinement loop is repeated until convergence or until
#'   \code{max_iter2} is reached.
#' }
#'
#' @return A list containing:
#' \itemize{
#'   \item \code{cps}: Sorted indices of the detected change points.
#'   \item \code{Sig_eta_hat}: Final estimate of \eqn{\Sigma_{\boldsymbol{\eta}}}.
#'   \item \code{Sig_nu_hat}: Final estimate of \eqn{\Sigma_{\boldsymbol{\nu}}},
#'     which may be non-diagonal if the second-stage refinement is performed.
#'   \item \code{Phi_hat}: Final estimate of \eqn{\Phi}, which may be
#'     non-diagonal if the second-stage refinement is performed.
#'   \item \code{muhats}: Estimated fluctuating mean sequence.
#'   \item \code{detectorStats}: Detector statistic evaluated over time.
#'   \item \code{cps_at}: A list mapping each detected change point to the
#'     indices of components selected as contributing to that change.
#' }
#'
#' @references
#' Tian, Y. and Safikhani, A. (2025).
#' Multiple change point detection in time series with non-stationary dynamics.
#' Manuscript under review.
#'
#' @examples
#' ## Minimal runnable example (fast)
#' set.seed(123)
#' p <- 1
#' mu0 <- rep(0, p)
#' deltas <- list(c(3), c(-3))
#' Sig_eta <- diag(0.01, p)
#' Sig_nu  <- random_Signu(p, 0)
#' Phi     <- random_Phi(p, 0)
#' Sig_e1  <- get_Sig_e1_approx(Sig_nu, Phi)
#'
#' # Simulate data with two evenly spaced change points
#' Y <- generate_data(mu0, deltas, Sig_eta, Sig_nu, Phi, Sig_e1,
#'                    errortype = "n", number_cps = 2, lengthofeachpart = 100)
#'
#' # Run the algorithm
#' out <- FluxPoint(Y, w = 20, tc = 1, max_iter1 = 5, max_iter2 = 5)
#' out$cps
#' # Visualization
#' p1 <- plot_FluxPoint(Y, out$muhats, out$cps, titlename = "", xaxis = "Time")
#' print(p1)
#'
#' \donttest{
#' ## More realistic example (may take longer)
#' set.seed(123)
#' p <- 3
#' mu0 <- rep(0, p)
#' deltas <- list(c(3, 0, -3), c(0, -2, 4))
#' Sig_eta <- diag(0.01, p)
#' Sig_nu  <- random_Signu(p, 0)
#' Phi     <- random_Phi(p, p)
#' Sig_e1  <- get_Sig_e1_approx(Sig_nu, Phi)
#'
#' Y <- generate_data(mu0, deltas, Sig_eta, Sig_nu, Phi, Sig_e1,
#'                    errortype = "n", number_cps = 2, lengthofeachpart = 100)
#'
#' out <- FluxPoint(Y, w = 20, tc = 1, max_iter1 = 5, max_iter2 = 5)
#' out$cps
#'
#' # Visualization
#' p1 <- plot_FluxPoint(Y, out$muhats, out$cps, titlename = "", xaxis = "Time")
#' print(p1)
#' }
#'
#' @export
FluxPoint <- function (data, w, tc, max_iter1, max_iter2, ignoreCross = FALSE, noeta = FALSE, nophi = FALSE, needReproduce = FALSE) {
  data <- as.matrix(data)
  p <- ncol(data)
  n <- nrow(data)
  D <- tc*min(4, log(exp(2) + p))*log(n - w)
  cps_at <- NULL

  phiLower = -0.8
  phiUpper = 0.8
  sigetaUpper = Inf

  if (noeta) {
    sigetaUpper = 1e-24
  }
  if (nophi) {
    phiLower = 0
    phiUpper = 1e-24
  }

  est_diag <- estimate_RWVAR_cp_heter(data, phiLower = phiLower, phiUpper = phiUpper, sigetaUpper = sigetaUpper)
  Sig_e1 <- get_Sig_e1_approx(est_diag$Sig_nu, est_diag$Phi)
  Sig_all_inv <- get_Sigs(w, p, est_diag$Sig_eta, est_diag$Sig_nu, est_diag$Phi, Sig_e1)$Sig_all_inv
  res_new <- FluxPoint_raw(data, Sig_all_inv, w, D, needReproduce = needReproduce)


  for (i in 1:max_iter1) {
    est_diag <- estimate_RWVAR_cp_heter(data, CPs = res_new$cps, phiLower = phiLower, phiUpper = phiUpper, sigetaUpper = sigetaUpper)
    Sig_e1 <- get_Sig_e1_approx(est_diag$Sig_nu, est_diag$Phi)
    Sig_all_inv <- get_Sigs(w, p, est_diag$Sig_eta, est_diag$Sig_nu, est_diag$Phi, Sig_e1)$Sig_all_inv
    res_old <- res_new
    res_new <- FluxPoint_raw(data, Sig_all_inv, w, D, needReproduce = needReproduce)

    if (setequal(res_new$cps, res_old$cps)) {
      break
    }
  }

  est_diag <- estimate_RWVAR_cp_heter(data, CPs = res_new$cps, phiLower = phiLower, phiUpper = phiUpper, sigetaUpper = sigetaUpper)
  Sig_e1 <- get_Sig_e1_approx(est_diag$Sig_nu, est_diag$Phi)


  if (ignoreCross) {
    if (!is.null(res_new$cps)) {
      shiftIndices <- matrix(res_new$shiftIndices[res_new$cps, ], nrow = length(res_new$cps))
      for (i in 1:nrow(shiftIndices)) {
        cps_at[[i]] <- which(shiftIndices[i, ] != 0)
      }
    }
    muhats <- estimate_musseg(data, res_new$cps, est_diag$Sig_eta, est_diag$Sig_nu, est_diag$Phi, Sig_e1)
    return(list(cps = res_new$cps, Sig_eta_hat = est_diag$Sig_eta, Sig_nu_hat = est_diag$Sig_nu, Phi_hat = est_diag$Phi, muhats = muhats, detectorStats = res_new$detectorStats, cps_at = cps_at))
  }

  if (p == 1) {
    muhats <- estimate_musseg(data, res_new$cps, est_diag$Sig_eta, est_diag$Sig_nu, est_diag$Phi, Sig_e1)
    return(list(cps = res_new$cps, Sig_eta_hat = est_diag$Sig_eta, Sig_nu_hat = est_diag$Sig_nu, Phi_hat = est_diag$Phi, muhats = muhats, detectorStats = res_new$detectorStats))
  }

  muhats <- estimate_musseg(data, res_new$cps, est_diag$Sig_eta, est_diag$Sig_nu, est_diag$Phi, Sig_e1)
  epsilons <- data - muhats
  est_nondiag <- estimatePhinu_nondiag(epsilons, est_diag$Sig_nu, est_diag$Phi, needReproduce = needReproduce)
  res_old <- res_new
  Sig_e1 <- get_Sig_e1_approx(est_nondiag$Sig_nu_hat, est_nondiag$Phi_hat)
  Sig_all_inv <- get_Sigs(w, p, est_diag$Sig_eta, est_nondiag$Sig_nu_hat, est_nondiag$Phi_hat, Sig_e1)$Sig_all_inv
  res_new <- FluxPoint_raw(data, Sig_all_inv, w, D, needReproduce = needReproduce)


  for (i in 1:max_iter2) {
    muhats <- estimate_musseg(data, res_new$cps, est_diag$Sig_eta, est_nondiag$Sig_nu_hat, est_nondiag$Phi_hat, Sig_e1)
    epsilons <- data - muhats
    est_nondiag <- estimatePhinu_nondiag(epsilons, est_diag$Sig_nu, est_diag$Phi, needReproduce = needReproduce)
    Sig_e1 <- get_Sig_e1_approx(est_nondiag$Sig_nu_hat, est_nondiag$Phi_hat)
    Sig_all_inv <- get_Sigs(w, p, est_diag$Sig_eta, est_nondiag$Sig_nu_hat, est_nondiag$Phi_hat, Sig_e1)$Sig_all_inv
    res_old <- res_new
    res_new <- FluxPoint_raw(data, Sig_all_inv, w, D, needReproduce = needReproduce)

    if (setequal(res_new$cps, res_old$cps)) {
      break
    }
  }

  if (!is.null(res_new$cps)) {
    shiftIndices <- matrix(res_new$shiftIndices[res_new$cps, ], nrow = length(res_new$cps))
    for (i in 1:nrow(shiftIndices)) {
      cps_at[[i]] <- which(shiftIndices[i, ] != 0)
    }
  }

  muhats <- estimate_musseg(data, res_new$cps, est_diag$Sig_eta, est_nondiag$Sig_nu_hat, est_nondiag$Phi_hat, Sig_e1)
  epsilons <- data - muhats
  est_nondiag <- estimatePhinu_nondiag(epsilons, est_diag$Sig_nu, est_diag$Phi, needReproduce = needReproduce)


  return(list(cps = res_new$cps, Sig_eta_hat = est_diag$Sig_eta, Sig_nu_hat = est_nondiag$Sig_nu_hat, Phi_hat = est_nondiag$Phi_hat, muhats = muhats, cps_at = cps_at, detectorStats = res_new$detectorStats))
}



#' Plot multivariate time series with detected change points and estimated means
#'
#' @description
#' Visualizes multivariate time series data together with the estimated
#' fluctuating mean sequence and detected change points obtained from the
#' proposed change point detection (CPD) algorithm.
#' Each variable is plotted in a separate panel (facet), with vertical dashed
#' lines marking detected change points and solid curves showing the
#' estimated means when provided.
#'
#' @param data Numeric matrix of dimension \eqn{n \times p}, representing
#' the observed multivariate time series \eqn{\{\mathbf{y}_t\}_{t=1}^n}.
#' @param muhats Optional numeric matrix of the same dimension as `data`,
#' giving the estimated fluctuating mean sequence
#' \eqn{\{\hat{\boldsymbol{\mu}}_t\}_{t=1}^n}.
#' If \code{NULL}, only raw data and detected change points are shown.
#' @param cps Numeric vector of detected change point locations.
#' @param titlename Character string for the plot title.
#' @param xaxis Character string for the x-axis label (e.g., "Time").
#'
#' @details
#' When \eqn{p = 1}, the function produces a single plot displaying the
#' observed time series, the estimated mean curve, and vertical dashed lines
#' indicating the detected change points. When \eqn{p > 1}, each variable is
#' shown in a separate facet with independently scaled y-axes for improved
#' readability. If \code{muhats} is provided, the estimated mean is overlaid
#' using \code{geom_line()}; otherwise, only the observed data and detected
#' change points are displayed.
#'
#' @return
#' A \pkg{ggplot2} object displaying the time series, estimated means
#' (if provided), and detected change points.
#'
#' @importFrom ggplot2 ggplot aes geom_point geom_line geom_vline facet_grid labs theme
#' @export
plot_FluxPoint <- function(data, muhats, cps, titlename = "", xaxis = "") {
  data <- as.matrix(data)
  n <- nrow(data)
  p <- ncol(data)
  y_values <- c(data)
  muhats_values <- c(muhats)
  x_values <- rep(1:n, p)
  variables <- NULL
  level <- NULL
  for (i in 1:p) {
    variables <- append(variables, rep(paste0(i), n))
    level <- append(level, rep(paste0(i)))
  }
  variables <- factor(variables, levels = level)

  if (p == 1) {
    if (!is.null(muhats)) {
      df <- data.frame(y_values, x_values, variables, muhats_values)
      p <- ggplot(df, aes(y=y_values, x=x_values)) +
        geom_point(aes(color = variables)) +
        geom_line(aes(x = x_values, y = muhats_values)) +
        geom_vline(xintercept = cps, linetype="dashed", color = "black") +
        theme(legend.position = "none") +
        labs(title=titlename, x = xaxis, y = "")
    }
    else {
      df <- data.frame(y_values, x_values, variables)
      p <- ggplot(df, aes(y=y_values, x=x_values)) +
        geom_point(aes(color = variables)) +
        geom_vline(xintercept = cps, linetype="dashed", color = "black") +
        theme(legend.position = "none") +
        labs(title=titlename, x = xaxis, y = "")
    }
    return(p)
  }

  if (!is.null(muhats)) {
    df <- data.frame(y_values, x_values, variables, muhats_values)
    p <- ggplot(df, aes(y=y_values, x=x_values)) +
      geom_point(aes(color = variables)) +
      geom_line(aes(x = x_values, y = muhats_values)) +
      geom_vline(xintercept = cps, linetype="dashed", color = "black") +
      facet_grid(variables ~ ., scales = "free_y") +
      theme(legend.position = "none") +
      labs(title=titlename, x = xaxis, y = "")
  }
  else {
    df <- data.frame(y_values, x_values, variables)
    p <- ggplot(df, aes(y=y_values, x=x_values)) +
      geom_point(aes(color = variables)) +
      geom_vline(xintercept = cps, linetype="dashed", color = "black") +
      facet_grid(variables ~ ., scales = "free_y") +
      theme(legend.position = "none") +
      labs(title=titlename, x = xaxis, y = "")
  }
  return(p)

}


#' Evaluate change point detection accuracy metrics
#'
#' @description
#' Computes standard evaluation metrics — bias, precision, recall, and F1-score —
#' for change point detection results by comparing estimated change points
#' against true ones within a specified tolerance (acceptance radius).
#'
#' @param n Integer; total series length.
#' @param num_cps Integer; true number of change points.
#' @param est_cps Numeric vector of estimated change point locations.
#' @param accept_radius Numeric; tolerance radius within which an estimated
#' change point is considered correctly detected (a true positive).
#'
#' @details
#' True change points are assumed to occur at evenly spaced positions. An
#' estimated change point is counted as a true positive if it lies within
#' \code{accept_radius} of any true change point location. Estimated points
#' that do not match any true change point are classified as false positives,
#' while true change points that are not detected are counted as false
#' negatives. Bias is defined as the absolute difference between the true and
#' estimated numbers of change points.
#'
#' The metrics are defined as:
#' \deqn{
#' \text{Precision} = \frac{TP}{TP + FP}, \quad
#' \text{Recall} = \frac{TP}{TP + FN}, \quad
#' F_1 = \frac{2 \cdot \text{Precision} \cdot \text{Recall}}{\text{Precision} + \text{Recall}}.
#' }
#'
#' @return
#' A list containing:
#' \itemize{
#'   \item `bias` — Absolute difference between true and estimated number of change points.
#'   \item `precision` — Proportion of correctly detected change points among all detections.
#'   \item `recall` — Proportion of true change points correctly detected.
#'   \item `F1` — Harmonic mean of precision and recall.
#' }
#'
#'
#' @export
get_metrics <- function(n, num_cps, est_cps, accept_radius){
  bias <- abs(num_cps - length(est_cps))
  precision <- 1
  recall <- 0
  F1 <- 0
  if (length(est_cps) != 0) {
    goodests <- 0
    TP <- 0
    for (k in 1:num_cps) {
      temp <- length(which((est_cps <= (k*n/(num_cps + 1) + accept_radius))&(est_cps >= (k*n/(num_cps + 1) - accept_radius))))
      goodests <- goodests + temp
      TP <- TP + (temp != 0)
    }
    FP <- length(est_cps) - goodests
    FN <- num_cps - TP
    precision <- (TP/(TP + FP))
    recall <- (TP/(TP + FN))
    if (TP != 0) {
      F1 <- (2*(precision*recall)/(precision+recall))
    }
  }

  return(list(bias = bias, precision = precision, recall = recall, F1 = F1))
}



#' Fit VAR Model with Elastic Net via Cross Validation
#'
#' Estimates a (possibly high-dimensional) VAR model using penalized least squares
#' with an elastic net penalty and cross validation.
#' This function is adapted from the \emph{sparsevar} package
#' (<https://github.com/svazzole/sparsevar/tree/master>), which is distributed under
#' the GNU General Public License v2. The code has been modified to specifically implement
#' the elastic net penalty (penalty = "ENET") and cross validation (method = "cv").
#'
#' @param data A numeric matrix or data frame with time series data (observations in rows,
#'   variables in columns).
#' @param p Integer. The order of the VAR model.
#' @param ... Additional options for estimation. Global options include:
#'   \itemize{
#'     \item \code{threshold}: Logical. If \code{TRUE}, all entries smaller than the oracle
#'           threshold are set to zero.
#'     \item \code{scale}: Logical. Whether to scale the data (default is \code{FALSE}).
#'     \item \code{nfolds}: Integer. The number of folds used for cross validation (default is 10).
#'     \item \code{parallel}: Logical. If \code{TRUE}, use multicore backend (default is \code{FALSE}).
#'     \item \code{ncores}: Integer. If \code{parallel = TRUE}, specify the number of cores to use.
#'     \item \code{alpha}: Numeric. The elastic net mixing parameter (default is 1, i.e. LASSO).
#'     \item \code{type.measure}: Character. The error measure for CV (e.g., \code{"mse"} or \code{"mae"}).
#'     \item \code{nlambda}: Integer. The number of lambda values to use in cross validation (default is 100).
#'     \item \code{leaveOut}: Integer. In time slice validation, leave out the last observations (default is 15).
#'     \item \code{horizon}: Integer. The forecast horizon to use for estimating error (default is 1).
#'     \item \code{lambda}: Either a numeric vector of lambda values or a string indicating which
#'           lambda to use (default is \code{"lambda.min"}).
#'     \item \code{return_fit}: Logical. If \code{TRUE}, return the complete fit object.
#'   }
#'
#' @return A list with the following components:
#'   \item{mu}{A vector of means for each variable.}
#'   \item{A}{A list (of length \code{p}) of the estimated coefficient matrices for the VAR process.}
#'   \item{fit}{(Optional) The complete results of the penalized least squares estimation.}
#'   \item{lambda}{The chosen lambda value (by cross validation).}
#'   \item{mse}{The minimum mean squared error from cross validation.}
#'   \item{mse_sd}{The standard deviation of the mean squared error.}
#'   \item{time}{Elapsed time for the estimation.}
#'   \item{series}{The (possibly transformed) input time series.}
#'   \item{residuals}{The residuals of the VAR model.}
#'   \item{sigma}{The estimated variance/covariance matrix of the residuals.}
#'
#' @references The original source code is adapted from the
#'   \href{https://github.com/svazzole/sparsevar/tree/master}{sparsevar package},
#'   which is distributed under the GNU General Public License v2.
#'
#' @keywords internal
fitVAR <- function(data, p = 1, needReproduce = FALSE, ...) {
  opt <- list(...)

  # convert data to matrix if necessary
  if (!is.matrix(data)) {
    data <- as.matrix(data)
  }

  cnames <- colnames(data)

  # Use cross validation to find lambda and fit the model
  out <- cvVAR(data, p, opt, needReproduce = needReproduce)

  # Add variable names to the estimated matrices if available
  if (!is.null(cnames)) {
    for (k in 1:length(out$A)) {
      colnames(out$A[[k]]) <- cnames
      rownames(out$A[[k]]) <- cnames
    }
  }

  return(out)
}


#' Cross-Validated VAR Estimation using Elastic Net
#'
#' This internal function performs cross validation for VAR estimation using the elastic net
#' penalty. It prepares the data, calls the elastic net CV routine, reshapes the estimated coefficients,
#' applies optional thresholding, computes residuals, and estimates the error covariance.
#'
#' @param data A numeric matrix with time series data (observations in rows, variables in columns).
#' @param p Integer. The order of the VAR model.
#' @param opt List. A list of options (see \code{fitVAR} for details).
#'
#' @return A list with components:
#'   \item{mu}{Vector of means of the original series.}
#'   \item{A}{List of VAR coefficient matrices (one for each lag).}
#'   \item{fit}{The complete elastic net CV fit (if requested).}
#'   \item{lambda}{The optimal lambda value chosen by CV.}
#'   \item{mse}{The minimum mean squared error from CV.}
#'   \item{mse_sd}{Standard deviation of the MSE.}
#'   \item{time}{Elapsed time for the ENET estimation.}
#'   \item{series}{The transformed series (after centering/scaling).}
#'   \item{residuals}{Residuals from the VAR model.}
#'   \item{sigma}{Estimated covariance matrix of the residuals.}
#'
#' @keywords internal
cvVAR <- function(data, p, opt = NULL, needReproduce = FALSE) {
  nc <- ncol(data)
  nr <- nrow(data)

  threshold <- ifelse(!is.null(opt$threshold), opt$threshold, FALSE)
  threshold_type <- ifelse(!is.null(opt$threshold_type), opt$threshold_type, "soft")
  return_fit <- ifelse(!is.null(opt$return_fit), opt$return_fit, FALSE)

  # Transform the dataset into design matrices for VAR estimation
  tr_dt <- transformData(data, p, opt)

  # Fit the elastic net model via cross validation
  t <- Sys.time()
  fit <- cvVAR_ENET(tr_dt$X, tr_dt$y, nvar = nc, opt, needReproduce = needReproduce)
  elapsed <- Sys.time() - t

  # Extract the lambda option
  lambda <- ifelse(is.null(opt$lambda), "lambda.min", opt$lambda)

  # Extract coefficients (ignoring the intercept) and reshape into a matrix
  Avector <- stats::coef(fit, s = lambda)
  A <- matrix(Avector[2:length(Avector)],
              nrow = nc, ncol = nc * p,
              byrow = TRUE)

  mse <- min(fit$cvm)

  # Apply thresholding if requested
  if (threshold == TRUE) {
    A <- applyThreshold(A, nr, nc, p, type = threshold_type)
  }

  # Split the coefficient matrix into a list (one matrix per lag)
  A <- splitMatrix(A, p)

  # Compute residuals from the VAR model
  res <- computeResiduals(tr_dt$series, A)

  # Extract the standard deviation of the CV error
  ix <- which(fit$cvm == min(fit$cvm))
  mse_sd <- fit$cvsd[ix]

  # Create and return the output list
  output <- list()
  output$mu <- tr_dt$mu
  output$A <- A

  if (return_fit == TRUE) {
    output$fit <- fit
  }

  output$lambda <- fit$lambda.min
  output$mse <- mse
  output$mse_sd <- mse_sd
  output$time <- elapsed
  output$series <- tr_dt$series
  output$residuals <- res

  # Estimate the error covariance matrix
  output$sigma <- estimateCovariance(res)

  attr(output, "class") <- "var"
  attr(output, "type") <- "fit"
  return(output)
}


#' Cross Validation for Elastic Net VAR Estimation
#'
#' This internal function performs cross validation using elastic net (ENET)
#' estimation via the \code{glmnet} package. It supports parallel processing if requested.
#'
#' @param X A numeric matrix of predictors.
#' @param y Numeric vector of responses.
#' @param nvar Integer. The number of variables in the original VAR (number of columns in data).
#' @param opt List. A list of options including:
#'   \itemize{
#'     \item \code{alpha}: The elastic net mixing parameter (default = 1).
#'     \item \code{nlambda}: Number of lambda values (default = 100).
#'     \item \code{type.measure}: Error measure for CV (default = "mse").
#'     \item \code{nfolds}: Number of folds for CV (default = 10).
#'     \item \code{parallel}: Logical. Whether to use parallel processing (default = FALSE).
#'     \item \code{ncores}: Number of cores for parallel processing (default = 1).
#'     \item \code{lambdas_list}: Optionally, a user-specified list of lambdas.
#'     \item \code{folds_ids}: Optionally, user-specified fold IDs for CV.
#'   }
#'
#' @return An object of class \code{cv.glmnet} as returned by \code{glmnet::cv.glmnet}.
#'
#' @keywords internal
cvVAR_ENET <- function(X, y, nvar, opt, needReproduce = FALSE) {
  a <- ifelse(is.null(opt$alpha), 1, opt$alpha)
  nl <- ifelse(is.null(opt$nlambda), 100, opt$nlambda)
  tm <- ifelse(is.null(opt$type.measure), "mse", opt$type.measure)
  nf <- ifelse(is.null(opt$nfolds), 10, opt$nfolds)
  parall <- ifelse(is.null(opt$parallel), FALSE, opt$parallel)
  ncores <- ifelse(is.null(opt$ncores), 1, opt$ncores)

  # Define lambda values to use
  if (!is.null(opt$lambdas_list)) {
    lambdas_list <- opt$lambdas_list
  } else {
    lambdas_list <- c(0)
  }

  # Assign fold
  if (needReproduce) {
    nr <- nrow(X)
    folds_ids <- rep(sort(rep(seq(nf), length.out = nr / nvar)), nvar)
  }
  else {
    folds_ids <- numeric(0)
  }



  # Call cv.glmnet with or without parallel processing
  if (parall == TRUE) {
    if (ncores < 1) {
      stop("The number of cores must be > 1")
    } else {
      cl <- doParallel::registerDoParallel(cores = ncores)
      if (length(folds_ids) == 0) {
        if (length(lambdas_list) < 2) {
          cvfit <- glmnet::cv.glmnet(X, y,
                                     alpha = a, nlambda = nl,
                                     type.measure = tm, nfolds = nf,
                                     parallel = TRUE, standardize = FALSE)
        } else {
          cvfit <- glmnet::cv.glmnet(X, y,
                                     alpha = a, lambda = lambdas_list,
                                     type.measure = tm, nfolds = nf,
                                     parallel = TRUE, standardize = FALSE)
        }
      } else {
        if (length(lambdas_list) < 2) {
          cvfit <- glmnet::cv.glmnet(X, y,
                                     alpha = a, nlambda = nl,
                                     type.measure = tm, foldid = folds_ids,
                                     parallel = TRUE, standardize = FALSE)
        } else {
          cvfit <- glmnet::cv.glmnet(X, y,
                                     alpha = a, lambda = lambdas_list,
                                     type.measure = tm, foldid = folds_ids,
                                     parallel = TRUE, standardize = FALSE)
        }
      }
    }
  } else {
    if (length(folds_ids) == 0) {
      if (length(lambdas_list) < 2) {
        cvfit <- glmnet::cv.glmnet(X, y,
                                   alpha = a, nlambda = nl,
                                   type.measure = tm, nfolds = nf,
                                   parallel = FALSE, standardize = FALSE)
      } else {
        cvfit <- glmnet::cv.glmnet(X, y,
                                   alpha = a, lambda = lambdas_list,
                                   type.measure = tm, nfolds = nf,
                                   parallel = FALSE, standardize = FALSE)
      }
    } else {
      if (length(lambdas_list) < 2) {
        cvfit <- glmnet::cv.glmnet(X, y,
                                   alpha = a, nlambda = nl,
                                   type.measure = tm, foldid = folds_ids,
                                   parallel = FALSE, standardize = FALSE)
      } else {
        cvfit <- glmnet::cv.glmnet(X, y,
                                   alpha = a, lambda = lambdas_list,
                                   type.measure = tm, foldid = folds_ids,
                                   parallel = FALSE, standardize = FALSE)
      }
    }
  }

  return(cvfit)
}


#' Transform Data for VAR Estimation
#'
#' Transforms the input time series data into the design matrices required for VAR estimation.
#' This includes centering, optional scaling, and constructing the lagged predictor matrix.
#'
#' @param data A numeric matrix or data frame with time series data (observations in rows,
#'   variables in columns).
#' @param p Integer. The order of the VAR model (number of lags).
#' @param opt List. Options for data transformation. Supported options include:
#'   \itemize{
#'     \item \code{scale}: Logical. Whether to scale the data columns (default is \code{FALSE}).
#'     \item \code{center}: Logical. Whether to center the data columns (default is \code{TRUE}).
#'   }
#'
#' @return A list with the following components:
#'   \item{X}{The design matrix (via the Kronecker product) for lagged predictors.}
#'   \item{y}{A vectorized response corresponding to the lagged data.}
#'   \item{series}{The (centered and possibly scaled) original time series matrix.}
#'   \item{mu}{A row vector of the column means used for centering.}
#'
#' @keywords internal
transformData <- function(data, p, opt) {
  nr <- nrow(data)
  nc <- ncol(data)
  data <- as.matrix(data)

  # Determine whether to scale and/or center the data
  scale_flag <- ifelse(is.null(opt$scale), FALSE, opt$scale)
  center_flag <- ifelse(is.null(opt$center), TRUE, opt$center)

  if (center_flag == TRUE) {
    m <- colMeans(data)
    cm <- matrix(rep(m, nrow(data)), nrow = nrow(data), byrow = TRUE)
    data <- data - cm
  } else {
    m <- rep(0, nc)
  }

  if (scale_flag == TRUE) {
    data <- apply(X = data, MARGIN = 2, FUN = scale)
  }

  # Construct lagged matrices
  tmpX <- data[1:(nr - 1), ]
  tmpY <- data[2:nr, ]
  tmpX <- duplicateMatrix(tmpX, p)
  tmpY <- tmpY[p:nrow(tmpY), ]
  y <- as.vector(tmpY)

  # Create the design matrix using the Kronecker product
  I <- Matrix::Diagonal(nc)
  X <- kronecker(I, tmpX)

  output <- list()
  output$X <- X
  output$y <- y
  output$series <- data
  output$mu <- t(m)
  return(output)
}


#' Apply Thresholding to VAR Coefficients
#'
#' Applies a thresholding rule to a coefficient matrix by setting entries below a
#' certain threshold to zero. Two types of thresholding are available: "soft" and "hard".
#'
#' @param a_mat Numeric matrix. The coefficient matrix to be thresholded.
#' @param nr Integer. The number of rows in the original data.
#' @param nc Integer. The number of variables (columns) in the original data.
#' @param p Integer. The order of the VAR model.
#' @param type Character. The type of threshold to apply; either \code{"soft"} (default)
#'   or \code{"hard"}.
#'
#' @return The thresholded coefficient matrix.
#'
#' @keywords internal
applyThreshold <- function(a_mat, nr, nc, p, type = "soft") {
  if (type == "soft") {
    tr <- 1 / sqrt(p * nc * log(nr))
  } else if (type == "hard") {
    tr <- (nc) ^ (-0.49)
  } else {
    stop("Unknown threshold type. Possible values are: \"soft\" or \"hard\"")
  }
  l_mat <- abs(a_mat) >= tr
  a_mat <- a_mat * l_mat
  return(a_mat)
}


#' Compute VAR Model Residuals
#'
#' Computes the residuals from a VAR model by subtracting the fitted values (obtained
#' from the estimated coefficient matrices) from the original time series data.
#'
#' @param data A numeric matrix of the original time series (observations in rows).
#' @param A List. A list of VAR coefficient matrices (one for each lag).
#'
#' @return A numeric matrix of residuals.
#'
#' @keywords internal
computeResiduals <- function(data, A) {
  nr <- nrow(data)
  nc <- ncol(data)
  p <- length(A)
  res <- matrix(0, nrow = nr, ncol = nc)
  f <- matrix(0, nrow = nr, ncol = nc)

  for (i in 1:p) {
    tmpD <- rbind(matrix(0, nrow = i, ncol = nc), data[1:(nrow(data) - i), ])
    tmpF <- t(A[[i]] %*% t(tmpD))
    f <- f + tmpF
  }
  res <- data - f
  return(res)
}


#' Estimate Covariance Matrix from Residuals
#'
#' Estimates the covariance (or variance) matrix of the residuals using shrinkage estimation.
#' This function utilizes \code{corpcor::cov.shrink} for covariance estimation.
#'
#' @param res A numeric matrix of residuals from the VAR model.
#' @param ... Additional arguments passed to \code{corpcor::cov.shrink} (if any).
#'
#' @return A numeric covariance matrix.
#'
#' @importFrom corpcor cov.shrink
#' @keywords internal
estimateCovariance <- function(res, ...) {
  nc <- ncol(res)
  s <- corpcor::cov.shrink(res, verbose = FALSE)
  sigma <- matrix(0, nrow = nc, ncol = nc)
  for (i in 1:nc) {
    for (j in 1:nc) {
      sigma[i, j] <- s[i, j]
    }
  }
  return(sigma)
}


#' Construct Lagged Design Matrix for VAR
#'
#' Duplicates the original data matrix to create a lagged predictor matrix for VAR estimation.
#'
#' @param data A numeric matrix with time series data (observations in rows).
#' @param p Integer. The order of the VAR model (number of lags).
#'
#' @return A numeric matrix with duplicated columns corresponding to lagged observations.
#'
#' @keywords internal
duplicateMatrix <- function(data, p) {
  nr <- nrow(data)
  nc <- ncol(data)
  outputData <- data
  if (p > 1) {
    for (i in 1:(p - 1)) {
      tmpData <- matrix(0, nrow = nr, ncol = nc)
      tmpData[(i + 1):nr, ] <- data[1:(nr - i), ]
      outputData <- cbind(outputData, tmpData)
    }
  }
  outputData <- outputData[p:nr, ]
  return(outputData)
}


#' Split Coefficient Matrix into VAR Lags
#'
#' Splits a matrix of estimated coefficients into a list of matrices,
#' each corresponding to one lag of the VAR model.
#'
#' @param M A numeric matrix of coefficients.
#' @param p Integer. The order of the VAR model (number of lags).
#'
#' @return A list of \code{p} matrices, each of dimension (number of variables) x (number of variables).
#'
#' @keywords internal
splitMatrix <- function(M, p) {
  nr <- nrow(M)
  A <- list()
  for (i in 1:p) {
    ix <- ((i - 1) * nr) + (1:nr)
    A[[i]] <- M[ , ix]
  }
  return(A)
}
