% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/classif_mcc.R
\name{mcc}
\alias{mcc}
\title{Matthews Correlation Coefficient}
\usage{
mcc(truth, response, positive = NULL, ...)
}
\arguments{
\item{truth}{(\code{factor()})\cr
True (observed) labels.
Must have the same levels and length as \code{response}.}

\item{response}{(\code{factor()})\cr
Predicted response labels.
Must have the same levels and length as \code{truth}.}

\item{positive}{(\code{character(1)}) Name of the positive class in case of binary classification.}

\item{...}{(\code{any})\cr
Additional arguments. Currently ignored.}
}
\value{
Performance value as \code{numeric(1)}.
}
\description{
Measure to compare true observed labels with predicted
labels
in multiclass classification tasks.
}
\details{
In the binary case, the Matthews Correlation Coefficient is defined as \deqn{
   \frac{\mathrm{TP} \cdot \mathrm{TN} - \mathrm{FP} \cdot \mathrm{FN}}{\sqrt{(\mathrm{TP} + \mathrm{FP}) (\mathrm{TP} + \mathrm{FN}) (\mathrm{TN} + \mathrm{FP}) (\mathrm{TN} + \mathrm{FN})}},
}{
   (TP * TN - FP * FN) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)),
}
where \eqn{TP}, \eqn{FP}, \eqn{TN}, \eqn{TP} are the number of true positives, false positives, true negatives, and false negatives respectively.

In the multi-class case, the Matthews Correlation Coefficient is defined for a multi-class confusion matrix \eqn{C} with \eqn{K} classes: \deqn{
   \frac{c \cdot s - \sum_k^K p_k \cdot t_k}{\sqrt{(s^2 - \sum_k^K p_k^2) \cdot (s^2 - \sum_k^K t_k^2)}},
}{
   (c * s - sum(pk * tk)) / sqrt((s^2 - sum(pk^2)) * (s^2 - sum(tk^2))),
}
where
\itemize{
\item \eqn{s = \sum_i^K \sum_j^K C_{ij}}: total number of samples
\item \eqn{c = \sum_k^K C_{kk}}: total number of correctly predicted samples
\item \eqn{t_k = \sum_i^K C_{ik}}: number of predictions for each class \eqn{k}
\item \eqn{p_k = \sum_j^K C_{kj}}: number of true occurrences for each class \eqn{k}.
}

The above formula is undefined if any of the four sums in the denominator is 0 in the binary case and more generally if either \eqn{s^2 - \sum_k^K p_k^2} or \eqn{s^2 - \sum_k^K t_k^2)} is equal to 0.
The denominator is then set to 1.

When there are more than two classes, the MCC will no longer range between -1 and +1.
Instead, the minimum value will be between -1 and 0 depending on the true distribution. The maximum value is always +1.
}
\section{Meta Information}{

\itemize{
\item Type: \code{"classif"}
\item Range: \eqn{[-1, 1]}{[-1, 1]}
\item Minimize: \code{FALSE}
\item Required prediction: \code{response}
}
}

\examples{
set.seed(1)
lvls = c("a", "b", "c")
truth = factor(sample(lvls, 10, replace = TRUE), levels = lvls)
response = factor(sample(lvls, 10, replace = TRUE), levels = lvls)
mcc(truth, response)
}
\references{
\url{https://en.wikipedia.org/wiki/Phi_coefficient}

Matthews BW (1975).
\dQuote{Comparison of the predicted and observed secondary structure of T4 phage lysozyme.}
\emph{Biochimica et Biophysica Acta (BBA) - Protein Structure}, \bold{405}(2), 442--451.
\doi{10.1016/0005-2795(75)90109-9}.
}
\seealso{
Other Classification Measures: 
\code{\link{acc}()},
\code{\link{bacc}()},
\code{\link{ce}()},
\code{\link{logloss}()},
\code{\link{mauc_aunu}()},
\code{\link{mbrier}()},
\code{\link{zero_one}()}
}
\concept{Classification Measures}
\concept{classification_measure}
