#' Integrate a TCR library into Seurat object metadata
#'
#' Modifies a `Seurat` object's metadata by taking all the columns
#' of the `all_contig_annotations.csv`, and adding new elements to `seurat_obj@meta.data`, 
#' corresponding to each cells' barcodes and handling duplicates (since the sequencing
#' of TRA and TRB genes creates multiple data points for the same cell).
#'
#' Columns from cells (barcodes) that had duplicates in another row are
#' concatenated into strings, separated by `__` in the metadata element. Barcodes
#' from the TCR library that had no matches to barcodes in the `seurat_obj` will
#' add `NA`s for all elements of the same index.
#'
#' @param seurat_obj Seurat object
#' @param tcr_file `data.frame` of the T cell library generated by Cell Ranger. It is very important that the row with cell barcodes is strictly named `"barcode"`, which is the default name of barcodes in 10X's `all_contig_annotations.csv` file.
#' @param verbose if `TRUE`, will display a progress bar to the R console.
#'
#' @return Returns a new Seurat object with new elements in the metadata
#'
#' @concept sc_RNAseq
#'
#' @export
#' @importFrom grDevices hcl
#' @importFrom stats aggregate
#' @importFrom stats na.omit
#' @importFrom utils setTxtProgressBar 
#' @importFrom data.table .GRP
#' @importFrom data.table .SD
#'
#' @examples
#' library(Seurat)
#' library(APackOfTheClones)
#' data("mini_clonotype_data","mini_seurat_obj")
#'
#' # integrate the TCR data into new seurat object
#' integrated_seurat_object <- integrate_tcr(mini_seurat_obj, mini_clonotype_data)
#' integrated_seurat_object
#'
#' @references atakanekiz (2019) Tutorial:Integrating VDJ sequencing data with Seurat. `https://www.biostars.org/p/384640/`
#'
integrate_tcr <- function(seurat_obj, tcr_file, verbose = TRUE) {
  time_called <- Sys.time()
  dev_integrate_tcr(seurat_obj, tcr_file, verbose, TRUE, time_called)
}

dev_integrate_tcr <- function(
  seurat_obj, tcr_file, verbose, do_add_command, time_called = Sys.time()
) {
  
  # this actually is kinda dumb, may need to write a custom function for this
  # It defaults to litereally storing the entire tcr dataframe in the params
  #if (do_add_command) {
  #  seurat_obj@commands[["integrate_tcr"]] <- make_apotc_command(time_called)
  #}
  
  tcr <- data.table::as.data.table(tcr_file)

  # Prepare a progress bar to monitor progress (helpful for large aggregations)
  if (verbose) {
    message("integrating TCR library into seurat object")
    grpn <- data.table::uniqueN(tcr$barcode)
    pb <- utils::txtProgressBar(min = 0, max = grpn, style = 3)
  }

  # Generate a function that will concatenate unique data entries and collapse duplicate rows
  # To do this, I first factorize the data and then get factor levels as unique data points
  # Then data points are pasted together separated with "__" to access later on if needed

  data_concater <- function(x){
    x <- levels(factor(x)) # not sure if na.omit is needed on x
    paste(x, collapse = "__")
  }

  # This code applies data_concater function per  barcodes to create a
  # concatenated string with  the information we want to keep
  
  if (verbose) {
    tcr_collapsed <- tcr[, {setTxtProgressBar(pb, .GRP);
      lapply(.SD, data_concater)},
      by = "barcode"]
  } else {
    tcr_collapsed <- tcr[, lapply(.SD, data_concater), by = "barcode"]
  }
  
  # assign rownames for integration and add metadata
  rownames(tcr_collapsed) <- tcr_collapsed$barcode
  
  # remove NA? - doesnt do anything
  tcr_collapsed <- na.omit(tcr_collapsed)

  seurat_obj <- Seurat::AddMetaData(
    seurat_obj,
    metadata = tcr_collapsed
  )
  
  if (verbose) {
    percent_integrated <- 100 - percent_na(seurat_obj)
    message(paste("\nPercent of unique barcodes:", as.character(round(percent_integrated)), "%"))
  }
  
  return(seurat_obj)
}

#' Alias to count the number of valid integrated TCR barcodes
#' 
#' @param seurat_obj A seurat object integrated with a t cell receptor library via \code{\link{integrate_tcr}}
#' 
#' @return Returns an integer indicating the number of valid barcodes that are not NA's
#'  # maybe export this in the future but not much point atm
#' @noRd
count_tcr_barcodes <- function(seurat_obj) sum(!is.na(seurat_obj@meta.data[["barcode"]])) # faster than looping

# get the percent of NA's in the metadata barcode column for the message
percent_na <- function(seurat_obj) {
  num_barcodes <- length(seurat_obj@meta.data[["barcode"]])
  100 * (num_barcodes - count_tcr_barcodes(seurat_obj)) / num_barcodes
}