% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stat-multcomp.R
\name{stat_multcomp}
\alias{stat_multcomp}
\title{Labels for pairwise multiple comparisons}
\usage{
stat_multcomp(
  mapping = NULL,
  data = NULL,
  geom = NULL,
  position = "identity",
  ...,
  formula = NULL,
  method = "lm",
  method.args = list(),
  contrasts = "Tukey",
  p.adjust.method = NULL,
  small.p = FALSE,
  adj.method.tag = 4,
  p.digits = 3,
  label.type = "bars",
  fm.cutoff.p.value = 1,
  mc.cutoff.p.value = 1,
  mc.critical.p.value = 0.05,
  label.y = NULL,
  vstep = NULL,
  output.type = NULL,
  na.rm = FALSE,
  orientation = "x",
  parse = NULL,
  show.legend = FALSE,
  inherit.aes = TRUE
)
}
\arguments{
\item{mapping}{The aesthetic mapping, usually constructed with
\code{\link[ggplot2]{aes}}. Only needs to be
set at the layer level if you are overriding the plot defaults.}

\item{data}{A layer specific dataset, only needed if you want to override
the plot defaults.}

\item{geom}{The geometric object to use to display the data.}

\item{position}{The position adjustment to use for overlapping points on this
layer.}

\item{...}{other arguments passed on to \code{\link[ggplot2]{layer}}. This
can include aesthetics whose values you want to set, not map. See
\code{\link[ggplot2]{layer}} for more details.}

\item{formula}{a formula object. Using aesthetic names \code{x} and \code{y}
instead of original variable names.}

\item{method}{function or character If character, "lm" (or its equivalent
"aov"), "rlm" or the name of a model fit function are accepted, possibly
followed by the fit function's \code{method} argument separated by a colon
(e.g. \code{"rlm:M"}). If a function different to \code{lm()}, it must
accept as a minimum a model formula through its first parameter, and have
formal parameters named \code{data}, \code{weights}, and \code{method}, and
return a model fit object accepted by function \code{glht()}.}

\item{method.args}{named list with additional arguments.}

\item{contrasts}{character vector of length one or a numeric matrix. If
character, one of "Tukey" or "Dunnet". If a matrix, one column per level
of the factor mapped to \code{x} and one row per \strong{pairwise}
contrast.}

\item{p.adjust.method}{character As the argument for parameter \code{type} of
function \code{adjusted()} passed as argument to parameter \code{test} of
\code{\link[multcomp]{summary.glht}}. Accepted values are "single-step",
"Shaffer", "Westfall", "free", "holm", "hochberg", "hommel", "bonferroni",
"BH", "BY", "fdr", "none".}

\item{small.p}{logical If true, use of lower case \emph{p} instead of capital
\emph{P} as the symbol for \emph{P}-value in labels.}

\item{adj.method.tag}{numeric, character or function If \code{numeric}, the
length in characters of the abbreviation of the method used to adjust
\emph{p}-values. A value of zero, adds no label and a negative value uses
as starting point for the abbreviation the word "adjusted". If
\code{character} its value is used as subscript. If a \code{function}, the
value used is the value returned by the function when passed
\code{p.adjust.method} as its only argument.}

\item{p.digits}{integer Number of digits after the decimal point to
use for \eqn{R^2} and \emph{P}-value in labels.}

\item{label.type}{character One of "bars", "letters" or "LETTERS", selects
how the results of the multiple comparisons are displayed. Only "bars" can
be used together with \code{contrasts = "Dunnet"}.}

\item{fm.cutoff.p.value}{numeric [0..1] The \emph{P}-value for the main
effect of factor \code{x} in the ANOVA test for the fitted model above
which no pairwise comparisons are computed or labels generated. Be aware
that recent literature tends to recommend to consider which testing
approach is relevant to the problem at hand instead of requiring the
significance of the main effect before applying multiple comparisons'
tests. The default value is 1, imposing no restrictions.}

\item{mc.cutoff.p.value}{numeric [0..1] The \emph{P}-value for the individual
contrasts above which no labelled bars are generated. Default is 1,
labelling all pairwise contrasts tested.}

\item{mc.critical.p.value}{numeric The critical \emph{P}-value used for tests
when encoded as letters.}

\item{label.y}{numeric vector Values in native data units or if
\code{character}, one of "top" or "bottom". Recycled if too short and
truncated if too long.}

\item{vstep}{numeric in npc units, the vertical displacement step-size
used between labels for different contrasts when \code{label.type = "bars"}.}

\item{output.type}{character One of "expression", "LaTeX", "text",
"markdown" or "numeric".}

\item{na.rm}{a logical indicating whether NA values should be stripped before
the computation proceeds.}

\item{orientation}{character Either "x" or "y" controlling the default for
\code{formula}. \strong{Support for \code{orientation} is not yet
implemented but is planned.}}

\item{parse}{logical Passed to the geom. If \code{TRUE}, the labels will be
parsed into expressions and displayed as described in \code{?plotmath}.
Default is \code{TRUE} if \code{output.type = "expression"} and
\code{FALSE} otherwise.}

\item{show.legend}{logical. Should this layer be included in the legends?
\code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE}
never includes, and \code{TRUE} always includes.}

\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, rather
than combining with them.}
}
\value{
A data frame with one row per comparison for \code{label.type =
  "bars"}, or a data frame with one row per factor \code{x} level for
  \code{label.type = "letters"} and for \code{label.type = "LETTERS"}.
  Variables (= columns) as described under \strong{Computed variables}.
}
\description{
\code{stat_multcomp} fits a linear model by default with \code{stats::lm()}
but alternatively using other model fit functions. The model is passed to
function \code{glht()} from package 'multcomp' to fit Tukey, Dunnet or other
\strong{pairwise} contrasts and generates labels based on adjusted
\emph{P}-values.
}
\details{
This statistic can be used to automatically annotate a plot with
  \emph{P}-values for \strong{pairwise} multiple comparison tests, based on
  Tukey contrasts (all pairwise), Dunnet contrasts (other levels against the
  first one) or a subset of all possible pairwise contrasts. See Meier (2022,
  Chapter 3) for an accessible explanation of multiple comparisons and
  contrasts with package 'multcomp', of which \code{stat_multcomp()} is
  mostly a wrapper.

  The explanatory variable mapped to the \emph{x} aesthetic must be a factor
  as this creates the required grouping. Currently, contrasts that involve
  more than two levels of a factor, such as the average of two treatment
  levels against a control level are not supported, mainly because they
  require a new geometry that I need to design, implement and add to package
  'ggpp'.

  Two ways of displaying the outcomes are implemented, and are selected by
  `"bars"`, `"letters"` or `"LETTERS"` as argument to parameter
  `label.type`. `"letters"` and `"LETTERS"` can be used only with Tukey
  contrasts, as otherwise the encoding is ambiguous. As too many bars clutter
  a plot, the maximum number of factor levels supported for `"bars"` together
  with Tukey contrasts is five, while together with Dunnet contrasts or
  contrasts defined by a numeric matrix, no limit is imposed.

  \code{stat_multcomp()} by default generates character labels ready to be
  parsed as R expressions but LaTeX (use TikZ device), markdown (use package
  'ggtext') and plain text are also supported, as well as numeric values for
  user-generated text labels. The value of \code{parse} is set automatically
  based on \code{output.type}, but if you assemble labels that need parsing
  from \code{numeric} output, the default needs to be overridden. This
  statistic only generates annotation labels and segments connecting the
  compared factor levels, or letter labels that discriminate significantly
  different groups.
}
\note{
R option \code{OutDec} is obeyed based on its value at the time the plot
  is rendered, i.e., displayed or printed. Set \code{options(OutDec = ",")}
  for languages like Spanish or French.
}
\section{Aesthetics}{
 \code{stat_multcomp()} understands \code{x} and
  \code{y}, to be referenced in the \code{formula} and \code{weight} passed
  as argument to parameter \code{weights}. A factor must be mapped to
  \code{x} and \code{numeric} variables to \code{y}, and, if used, to
  \code{weight}. In addition, the aesthetics understood by the geom
  (\code{"label_pairwise"} is the default for \code{label.type = "bars"},
  \code{"text"} is the default for \code{label.type = "letters"} and for
  \code{label.type = "LETTERS"}) are understood and grouping
  respected.
}

\section{Computed variables}{

If \code{output.type = "numeric"} and
\code{label.type = "bars"} the returned tibble contains
columns listed below. In all cases if the model fit function used does not return a value,
the label is set to \code{character(0L)} and the numeric value to \code{NA}.
\describe{
  \item{x,x.left.tip,x.right.tip}{x position, numeric.}
  \item{y}{y position, numeric.}
  \item{coefficients}{Delta estimate from pairwise contrasts, numeric.}
  \item{contrasts}{Contrasts as two levels' ordinal "numbers" separated by a dash, character.}
  \item{tstat}{\emph{t}-statistic estimates for the pairwise contrasts, numeric.}
  \item{p.value}{\emph{P}-value for the pairwise contrasts.}
  \item{fm.method}{Set according \code{method} used.}
  \item{fm.class}{Most derived class of the fitted model object.}
  \item{fm.formula}{Formula extracted from the fitted model object if available, or the formula argument.}
  \item{fm.formula.chr}{Formula extracted from the fitted model object if available, or the formula argument, formatted as character.}
  \item{mc.adjusted}{The method used to adjust the \emph{P}-values.}
  \item{mc.contrast}{The type of contrast used for multiple comparisons.}
  \item{n}{The total number of observations or rows in data.}
  \item{default.label}{text label, always included, but possibly NA.}
  }

If output.type is not \code{"numeric"} the returned data frame includes in
addition the following labels:

\describe{
  \item{stars.label}{\emph{P}-value for the pairwise contrasts encoded as "starts", character.}
  \item{p.value.label}{\emph{P}-value for the pairwise contrasts, character.}
  \item{delta.label}{The coefficient or estimate for the difference between compared pairs of levels.}
  \item{t.value.label}{\emph{t}-statistic estimates for the pairwise contrasts, character.}
  }

If \code{label.type = "letters"} or \code{label.type = "LETTERS"} the returned tibble contains
columns listed below.

\describe{
  \item{x,x.left.tip,x.right.tip}{x position, numeric.}
  \item{y}{y position, numeric.}
  \item{critical.p.value}{\emph{P}-value used in pairwise tests, numeric.}
  \item{fm.method}{Set according \code{method} used.}
  \item{fm.class}{Most derived class of the fitted model object.}
  \item{fm.formula}{Formula extracted from the fitted model object if available, or the formula argument.}
  \item{fm.formula.chr}{Formula extracted from the fitted model object if available, or the formula argument, formatted as character.}
  \item{mc.adjusted}{The method used to adjust the \emph{P}-values.}
  \item{mc.contrast}{The type of contrast used for multiple comparisons.}
  \item{n}{The total number of observations or rows in data.}
  \item{default.label}{text label, always included, but possibly NA.}
  }

If output.type is not \code{"numeric"} the returned data frame includes in
addition the following labels:

\describe{
  \item{letters.label}{Letters that distinguish levels based on significance from multiple comparisons test.}
  }
}

\section{Alternatives}{
 \code{stat_signif()} in package 'ggsignif' is
  an earlier and independent implementation of pairwise tests.
}

\examples{

p1 <- ggplot(mpg, aes(factor(cyl), hwy)) +
  geom_boxplot(width = 0.33)

## labeleld bars

p1 +
  stat_multcomp()

p1 +
  stat_multcomp(adj.method.tag = 0)

# test against a control, with first level being the control
# change order of factor levels in data to set the control group
p1 +
  stat_multcomp(contrasts = "Dunnet")

# arbitrary pairwise contrasts, in arbitrary order
p1 +
  stat_multcomp(contrasts = rbind(c(0, 0, -1, 1),
                                  c(0, -1, 1, 0),
                                  c(-1, 1, 0, 0)))

# different methods to adjust the contrasts
p1 +
  stat_multcomp(p.adjust.method = "bonferroni")

p1 +
  stat_multcomp(p.adjust.method = "holm")

p1 +
  stat_multcomp(p.adjust.method = "fdr")

# no correction, useful only for comparison
p1 +
  stat_multcomp(p.adjust.method = "none")

# sometimes we need to expand the plotting area
p1 +
  stat_multcomp(geom = "text_pairwise") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.10)))

# position of contrasts' bars (based on scale limits)
p1 +
  stat_multcomp(label.y = "bottom")

p1 +
  stat_multcomp(label.y = 11)

# use different labels: difference and P-value from hypothesis tests
p1 +
  stat_multcomp(use_label(c("Delta", "P")),
                size = 2.75)

# control smallest P-value displayed and number of digits
p1 +
  stat_multcomp(p.digits = 4)

# label only significant differences
# but test and correct for all pairwise contrasts!
p1 +
  stat_multcomp(mc.cutoff.p.value = 0.01)

## letters as labels for test results

p1 +
  stat_multcomp(label.type = "letters")

# use capital letters
p1 +
  stat_multcomp(label.type = "LETTERS")

# location
p1 +
  stat_multcomp(label.type = "letters",
                label.y = "top")

p1 +
  stat_multcomp(label.type = "letters",
                label.y = 0)

# stricter critical p-value than default used for test
p1 +
  stat_multcomp(label.type = "letters",
                mc.critical.p.value = 0.01)

# Inspecting the returned data using geom_debug()
# This provides a quick way of finding out the names of the variables that
# are available for mapping to aesthetics with after_stat().

gginnards.installed <- requireNamespace("gginnards", quietly = TRUE)

if (gginnards.installed)
  library(gginnards)

if (gginnards.installed)
p1 +
  stat_multcomp(label.type = "bars",
                geom = "debug")

if (gginnards.installed)
p1 +
  stat_multcomp(label.type = "letters",
                geom = "debug")

if (gginnards.installed)
p1 +
  stat_multcomp(label.type = "bars",
                output.type = "numeric",
                geom = "debug")

}
\references{
Meier, Lukas (2022) \emph{ANOVA and Mixed Models: A Short Introduction
Using R}. Chapter 3 Contrasts and Multiple Testing. The R Series. Boca Raton:
Chapman and Hall/CRC. ISBN: 9780367704209, \doi{10.1201/9781003146216}.
}
\seealso{
This statistic uses the implementation of Tests of General Linear
  Hypotheses in function \code{\link[multcomp]{glht}}. See
  \code{\link[multcomp]{summary.glht}} and \code{\link[stats]{p.adjust}}
  for the supported and tests and the references therein for the theory
  behind them.
}
\concept{ggplot statistics for multiple comparisons}
