#' Join two data.tables together
#'
#' @description Join two data.tables together
#'
#' @param x A data.frame or data.table
#' @param y A data.frame or data.table
#' @param by A character vector of variables to join by. If NULL, the default, the join will do a natural join, using all variables with common names across the two tables.
#' @param suffix Append created for duplicated column names when using `full_join.()`
#' @param ... Other parameters passed on to methods
#' @param keep Should the join keys from both `x` and `y` be preserved in the output?
#'
#' @export
#'
#' @examples
#' df1 <- data.table(x = c("a", "a", "b", "c"), y = 1:4)
#' df2 <- data.table(x = c("a", "b"), z = 5:6)
#'
#' df1 %>% left_join.(df2)
#' df1 %>% inner_join.(df2)
#' df1 %>% right_join.(df2)
#' df1 %>% full_join.(df2)
#' df1 %>% anti_join.(df2)
left_join. <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE) {
  UseMethod("left_join.")
}

#' @export
left_join..default <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE) {
  c(x, y, x_names, y_names, by, on, selection) %<-%
    join_prep(x, y, by, keep, suffix, "left")

  if (keep) {
    result_df <- dt(y, x, !!selection, on = on, allow.cartesian = TRUE)
  } else {
    result_df <- y[x, on = on, allow.cartesian = TRUE]

    result_df <- df_set_names(result_df, by$x, by$y)
    result_df <- df_col_order(result_df, c(x_names, y_names))
  }

  tidytable_restore(result_df, x)
}

#' @export
#' @rdname left_join.
right_join. <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE) {
  UseMethod("right_join.")
}

#' @export
right_join..default <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE) {
  c(x, y, x_names, y_names, by, on, selection) %<-%
    join_prep(x, y, by, keep, suffix, "right")

  if (keep) {
    result_df <- dt(x, y, !!selection, on = on, allow.cartesian = TRUE)
  } else {
    result_df <- x[y, on = on, allow.cartesian = TRUE]
  }

  tidytable_restore(result_df, x)
}

#' @export
#' @rdname left_join.
inner_join. <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE) {
  UseMethod("inner_join.")
}

#' @export
inner_join..default <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE) {
  c(x, y, x_names, y_names, by, on, selection) %<-%
    join_prep(x, y, by, keep, suffix, "inner")

  if (keep) {
    result_df <- dt(x, y, !!selection,
                    on = on, allow.cartesian = TRUE,
                    nomatch = 0)
  } else {
    result_df <- x[y, on = on, allow.cartesian = TRUE, nomatch = 0]
  }

  tidytable_restore(result_df, x)
}

#' @export
#' @rdname left_join.
full_join. <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE) {
  UseMethod("full_join.")
}

#' @export
full_join..default <- function(x, y, by = NULL, suffix = c(".x", ".y"), ..., keep = FALSE) {
    if (!is.data.frame(x) | !is.data.frame(y)) stop("x & y must be a data.frame or data.table")
    if (!is_tidytable(x)) x <- as_tidytable(x)
    if (!is_tidytable(y)) y <- as_tidytable(y)

    if (!keep) {
      result_df <- join_mold(
        x, y, by = by, suffix = suffix,
        all_x = TRUE, all_y = TRUE
      )

      col_order <- suffix_join_names(names(x), names(y), suffix, keep, get_bys(x, y, by), "full")

      result_df <- df_col_order(result_df, col_order)
    } else {
      bys <- get_bys(x, y, by)
      by_x <- bys$x
      by_y <- bys$y

      unique_keys_df <- select.(x, any_of(by_x)) %>%
        set_names(by_y) %>%
        bind_rows.(
          select.(y, any_of(by_y))
        ) %>%
        distinct.()

      step_df <- right_join.(y, unique_keys_df, keep = TRUE, suffix = c("__temp__", ""))

      drop_cols <- by_y[by_x != by_y]
      if (length(by_y[by_x == by_y]) > 0) {
        drop_cols <- c(drop_cols, paste0(by_y[by_x == by_y], suffix[[2]]))
      }

      result_df <- right_join.(x, step_df, by = by, suffix = suffix, keep = TRUE)
      result_df <- dt_j(result_df, (drop_cols) := NULL)
      result_df <- rename_with.(result_df, ~ temp_names_fix(.x, by_x, suffix[[2]]), ends_with("__temp__"))
    }

    tidytable_restore(result_df, x)
}

temp_names_fix <- function(names, by_x, y_suffix) {
  new_names <- str_replace.(names, "__temp__", "")

  map_chr.(new_names, function(.x) if (.x %in% by_x) paste0(.x, y_suffix) else .x)
}

#' @export
#' @rdname left_join.
anti_join. <- function(x, y, by = NULL) {
  UseMethod("anti_join.")
}

#' @export
anti_join..default <- function(x, y, by = NULL) {
  c(x, y, x_names, y_names, by, on, selection) %<-%
    join_prep(x, y, by, keep = FALSE, suffix = NULL, "anti")

  result_df <- x[!y, on = on, allow.cartesian = TRUE]

  tidytable_restore(result_df, x)
}

#' @export
#' @rdname left_join.
semi_join. <- function(x, y, by = NULL) {
  UseMethod("semi_join.")
}

#' @export
semi_join..default <- function(x, y, by = NULL) {
  c(x, y, x_names, y_names, by, on, selection) %<-%
    join_prep(x, y, by, keep = FALSE, suffix = NULL, "semi")

  result_df <- fsetdiff(x, x[!y, on = on], all=TRUE)

  tidytable_restore(result_df, x)
}

get_bys <- function(x, y, by = NULL) {
  names_x <- names(x)
  names_y <- names(y)

  if (is.null(by)) {
    by_x <- by_y <- intersect(names_x, names_y)
  } else {
    by_x <- names(by)
    by_y <- unname(by)
    if (is.null(by_x)) {
      by_x <- by_y
    }
  }

  by_x[by_x == ""] <- by_y[by_x == ""]

  if (any(by_x %notin% names_x)) abort("by.x columns not in x")
  if (any(by_y %notin% names_y)) abort("by.y columns not in y")

  list(x = by_x, y = by_y)
}

join_prep <- function(x, y, by, keep, suffix, type) {
  if (!is.data.frame(x) | !is.data.frame(y)) {
    abort("x & y must be a data.frame or data.table")
  }
  if (!is_tidytable(x)) x <- as_tidytable(x)
  if (!is_tidytable(y)) y <- as_tidytable(y)

  x_names <- names(x)
  y_names <- names(y)

  by <- get_bys(x, y, by)

  if (!keep) {
    y_names <- setdiff(y_names, by$y)
    suffix_names <- intersect(setdiff(x_names, by$x), y_names)
  } else {
    suffix_names <- intersect(x_names, y_names)
  }

  if (length(suffix_names) > 0) {
    x <- df_set_names(x, paste0(suffix_names, suffix[[1]]), suffix_names)
    y <- df_set_names(y, paste0(suffix_names, suffix[[2]]), suffix_names)

    x_names <- names(x)
    y_names <- names(y)

    by_x_suffix <- by$x %in% suffix_names
    if (any(by_x_suffix)) {
      by_y_suffix <- by$y %in% suffix_names
      by$x[by_x_suffix] <- paste0(by$x[by_x_suffix], suffix[[1]])
      by$y[by_y_suffix] <- paste0(by$y[by_y_suffix], suffix[[2]])
    }

    if (!keep) {
      y_names <- setdiff(y_names, by$y)
    }
  }

  if (type == "left") {
    on <- by$x
    names(on) <- by$y
  } else {
    on <- by$y
    names(on) <- by$x
  }

  if (keep) {
    if (type == "left") {
      x_prefix <- "i."
      y_prefix <- "x."
    } else {
      x_prefix <- "x."
      y_prefix <- "i."
    }
    selection <- c(paste0(x_prefix, x_names), paste0(y_prefix, y_names))
    names(selection) <- c(x_names, y_names)
    selection <- call2(".", !!!syms(selection))
  } else {
    selection <- NULL
  }

  list(x, y, x_names, y_names, by, on, selection)
}

join_mold <- function(x, y, by = NULL, suffix = c(".x", ".y"), all_x, all_y) {
  if (!is.data.frame(x) | !is.data.frame(y)) stop("x & y must be a data.frame or data.table")
  if (!is_tidytable(x)) x <- as_tidytable(x)
  if (!is_tidytable(y)) y <- as_tidytable(y)

  by <- get_bys(x, y, by)

  result_df <- merge(
    x = x, y = y, by.x = by$x, by.y = by$y, suffixes = suffix,
    all.x = all_x, all.y = all_y, allow.cartesian = TRUE, sort = FALSE
  )

  setkey(result_df, NULL)

  result_df
}

suffix_join_names <- function(x_names, y_names, suffix, keep, by = NULL, type) {
  if (!keep && type != "left") {
    y_names <- y_names[y_names %notin% by$y]
  }
  df_names <- c(x_names, y_names)
  is_x_duplicate <- duplicated(df_names, fromLast = TRUE)
  if (any(is_x_duplicate)) {
    is_y_duplicate <- duplicated(df_names)
    new_names <- df_names
    new_names[is_x_duplicate] <- paste0(new_names[is_x_duplicate], suffix[[1]])
    new_names[is_y_duplicate] <- paste0(new_names[is_y_duplicate], suffix[[2]])
    df_names <- new_names
  }
  df_names
}

globalVariables(
  c("x_names", "y_names", "on", "selection")
)
