#' @title Function to estimate the one-sample mean frequency under a group sequential design.
#' @description Computes the nonparametric one-sample estimator of the mean frequency function for composite endpoints consisting
#' of recurrent events and terminal event (death), using data observed up to a given calendar time. The estimator is constructed
#' on the event-time scale (time since subject enrollment).
#'
#' @param data A data frame generated by \code{Onesample.generate.sequential()} (optionally after applying \code{Apply.calendar.censoring()})
#' containing composite endpoint data in long format. Each subject may contribute multiple rows corresponding to recurrent events,
#' terminal event, or censoring.
#' @param t Optional numeric vector specifying event times (time since enrollment) at which the mean frequency estimator
#' and its variance are evaluated. Default is \code{NULL}.
#'
#' @returns If \code{t} is provided, returns a list containing:
#' \itemize{
#' \item \code{muhat}: Estimated mean frequency function evaluated at \code{t}.
#' \item \code{varhat}: Estimated variance of \code{muhat} at \code{t}.
#' \item \code{n}: Sample size of the input dataset.
#' }
#'  If \code{t} is \code{NULL}, returns a numeric vector \code{xihat} giving the estimated cumulative variance component
#'  evaluated at all ordered observed event time. This quantity is not the mean frequency estimator itself.
#'  @importFrom dplyr group_by filter mutate count select slice ungroup
#'  @importFrom tibble as_tibble
#'  @importFrom bdsmatrix bdsBlock
#'  @importFrom rlang .data
#' @export
#'
#' @examples
#' df <- Onesample.generate.sequential(size = 400, recruitment = 3,
#' calendar = 5, random.censor.rate = 0.05, seed = 1123)
#' OneSample.Estimator.sequential(data= df, t = c(1.5, 2.9, 4.6))
OneSample.Estimator.sequential <- function(data, t = NULL){

  # Only select patients who are already in the study
  # Convert the event times from calendar scale back to event scale
  data_new <- data %>%
    dplyr::group_by(.data$id) %>%
    dplyr::filter(!is.na(.data$status)) %>%
    dplyr::mutate(true_event_time = .data$event_time_cal - .data$e)

  # sort all event times (recurrent, death, and censoring)
  data_new <- data_new[order(data_new$true_event_time),]


  data_original <- data %>%
    dplyr::group_by(.data$id) %>%
    dplyr::filter(!is.na(.data$status)) %>%
    dplyr::mutate(true_event_time = .data$event_time_cal - .data$e)
  # The difference between 'data_new' and 'data_original' is the the former is sorted
  # This part in new

  # All event times, including recurrent, death and censoring
  # sorted.time <- data_new$time
  sorted.time <- data_new$true_event_time
  sorted.event <- data_new$event
  n <- length(unique(data_new$id)) # sample size
  L <- length(sorted.event) # total number of all events (recurrent, death and censoring)

  # last observation for each subject, death or censoring
  # last.time <- data_new$time[data_new$status == 1 | data_new$status == 0]
  # last.time.unsorted <- data$time[data$status == 1 | data$status == 0]
  last.time <- data_new$true_event_time[data_new$status == 1 | data_new$status == 0]
  last.time.unsorted <- data_original$true_event_time[data$status == 1 | data$status == 0]
  last.time.id <- match(last.time, last.time.unsorted)

  # At risk process for each event
  Y <- 1*(matrix(rep(last.time, L), n, L) >= matrix(rep(sorted.time, each = n), n, L))

  # Kaplan Meier estimates for death time points
  death <- data_new$death
  KMhat <- cumprod(1 - death/colSums(Y))

  # Cumulative hazard estimator (Nelson-Aalen type) for all dN(t) = 1 (recurrent and death)
  dRhat <- sorted.event/colSums(Y)
  Rhat <- cumsum(dRhat)

  # Mean frequency estimator
  dmuhat <- KMhat*dRhat
  muhat <- cumsum(dmuhat)

  ##### variance estimator #######
  ## The calculation uses original id order, not the sorted last observation time order##
  # Y <- Y[last.time.id,]
  Y <- Y[order(last.time.id),]
  Ybar.temp <- colSums(Y)
  Ybar <- Ybar.temp + 1*(Ybar.temp== 0) # Preventing the denominator (total number at risk) to be zero

  # cumulative hazard for death
  delta <- data[data$status == 1|data$status == 0,]$death
  ND <- 1*(matrix(rep(last.time.unsorted, L), n, L) <= matrix(rep(sorted.time, each = n), n, L))*delta
  dND <- t(apply(ND, 1, function(x) diff(c(0,x))))
  dlambdaDhat <- colSums(t(t(dND)/Ybar))
  lambdaDhat <- cumsum(dlambdaDhat)
  # intensity process for death
  dADhat <- t(apply(Y, 1, function(x) x*dlambdaDhat))
  dMDhat <- dND - dADhat

  # number of events per subject, will be used to create a block diagonal matrix
  id.size <- data.frame(data %>% group_by(.data$id) %>% count())$n
  grp <- bdsBlock(1:L, rep(1:n, id.size)) # block diagonal matrix
  grp <- as.matrix(grp)
  # original.time <- data$time
  original.time <- data_original$true_event_time
  t1 <- matrix(rep(original.time, each = L), L, L)*grp
  # put each subject's all event times on block diagonal, t1 is L x L
  t2 <- unique(t1) # keep only one row per subject, t2 is n x L
  original.event <- data$event # "event' is dN(t) , sum of 'recurrent' and 'death'

  t3 <- matrix(rep(original.event, each = L), L, L)*grp
  # put each subject's all event indicator on block diag, t3 is L x L
  t4 <- as.matrix(cbind(id = data$id, t3) %>% as_tibble() %>% group_by(.data$id) %>%
                    slice(n()) %>% ungroup() %>% select(-id)) # keep only one row per subject, t4 is n x L

  t5 <- t2*t4 # make censoring times become zero, since dN(t) = 0 when censored
  t6 <- unname(t(apply(t5, 1, function(x) x[order(original.time)])))
  # sort each subjects' dN(t) = 1 (recurrent and death) times in the sorted order, t6 is n x L
  dN <- 1*(t6 == matrix(rep(sorted.time, each = n), n, L)) # at which time point did dN(t) jump

  N <- t(apply(dN, 1, cumsum)) # cumulative number of events per subject, the N matrix is n x L
  dAhat <- t(apply(Y, 1, function(x) x*dRhat))
  dMhat <- dN - dAhat


  dpartI <- t(apply(dMhat, 1, function(x) x*KMhat/(Ybar/n)))
  partI <- t(apply(dpartI, 1, cumsum))

  dpartII <- t(apply(dMDhat, 1, function(x) x/(Ybar/n)))
  partII.1 <- t(apply(dpartII, 1, cumsum))
  partII <- t(apply(partII.1, 1, function(x) x*muhat))

  dpartIII <- t(apply(dMDhat, 1, function(x) x*muhat/(Ybar/n)))
  partIII <- t(apply(dpartIII, 1, cumsum))

  Psihat <- partI - partII + partIII
  xihat <- colSums(Psihat^2)/n

  if (!is.null(t)){
    var <- NULL
    mu <- NULL
    for(i in 1:length(t)){
      var[i] <- xihat[max(which(sorted.time <= t[i]))]/n
      mu[i] <- muhat[max(which(sorted.time <= t[i]))]
    }
    return(list(muhat = mu, varhat = var, n = n))
  }
  else{return(xihat)}
}
