% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/partial_dependence.R
\name{partial_dependence}
\alias{partial_dependence}
\alias{partial_dependence.default}
\alias{partial_dependence.ranger}
\alias{partial_dependence.explainer}
\title{Partial Dependence}
\usage{
partial_dependence(object, ...)

\method{partial_dependence}{default}(
  object,
  v,
  data,
  pred_fun = stats::predict,
  trafo = NULL,
  which_pred = NULL,
  w = NULL,
  breaks = "Sturges",
  right = TRUE,
  discrete_m = 5L,
  outlier_iqr = 2,
  pd_n = 500L,
  seed = NULL,
  ...
)

\method{partial_dependence}{ranger}(
  object,
  v,
  data,
  pred_fun = NULL,
  trafo = NULL,
  which_pred = NULL,
  w = NULL,
  breaks = "Sturges",
  right = TRUE,
  discrete_m = 5L,
  outlier_iqr = 2,
  pd_n = 500L,
  seed = NULL,
  ...
)

\method{partial_dependence}{explainer}(
  object,
  v = colnames(data),
  data = object$data,
  pred_fun = object$predict_function,
  trafo = NULL,
  which_pred = NULL,
  w = object$weights,
  breaks = "Sturges",
  right = TRUE,
  discrete_m = 5L,
  outlier_iqr = 2,
  pd_n = 500L,
  seed = NULL,
  ...
)
}
\arguments{
\item{object}{Fitted model.}

\item{...}{Further arguments passed to \code{pred_fun()}, e.g., \code{type = "response"} in
a \code{glm()} or (typically) \code{prob = TRUE} in classification models.}

\item{v}{Vector of variable names to calculate statistics.}

\item{data}{Matrix or data.frame.}

\item{pred_fun}{Prediction function, by default \code{stats::predict}.
The function takes three arguments (names irrelevant): \code{object}, \code{data}, and \code{...}.}

\item{trafo}{How should predictions be transformed?
A function or \code{NULL} (default). Examples are \code{log} (to switch to link scale)
or \code{exp} (to switch from link scale to the original scale).}

\item{which_pred}{If the predictions are multivariate: which column to pick
(integer or column name). By default \code{NULL} (picks last column).}

\item{w}{Optional vector with case weights. Can also be a column name in \code{data}.}

\item{breaks}{An integer, vector, string or function specifying the bins
of the numeric X variables as in \code{\link[graphics:hist]{graphics::hist()}}. The default is "Sturges".
To allow varying values of \code{breaks} across variables, it can be a list of the
same length as \code{v}, or a \emph{named} list with \code{breaks} for certain variables.}

\item{right}{Should bins be right-closed? The default is \code{TRUE}.
Vectorized over \code{v}. Only relevant for numeric X.}

\item{discrete_m}{Numeric X variables with up to this number of unique values
should not be binned and treated as a factor (after calculating partial dependence)
The default is 5. Vectorized over \code{v}.}

\item{outlier_iqr}{Outliers of a numeric X are capped via the boxplot rule, i.e.,
outside \code{outlier_iqr} * IQR from the quartiles. The default is 2 is more
conservative than the usual rule to account for right-skewed distributions.
Set to 0 or \code{Inf} for no capping. Note that at most 10k observations are sampled
to calculate quartiles. Vectorized over \code{v}.}

\item{pd_n}{Size of the data used for calculating partial dependence.
The default is 500. For larger \code{data} (and \code{w}), \code{pd_n} rows are randomly sampled.
Each variable specified by \code{v} uses the same subsample. Set to 0 to omit.}

\item{seed}{Optional random seed (an integer) used for:
\itemize{
\item Partial dependence: select background data if \code{n > pd_n}.
\item Capping X: quartiles are selected based on 10k observations.
}}
}
\value{
A list (of class "EffectData") with a data.frame of statistics per feature. Use
single bracket subsetting to select part of the output.
}
\description{
Calculates PD for one or multiple \code{X} variables.

PD was introduced by Friedman (2001) to study the (main) effects
of a ML model. PD of a model f and variable \code{X} at a certain value g
is derived by replacing the \code{X} values in a reference \code{data} by g,
and then calculating the average prediction of f over this modified data.
This is done for different g  to see how the average prediction of f changes in \code{X},
keeping all other feature values constant (Ceteris Paribus).

This function is a convenience wrapper around \code{\link[=feature_effects]{feature_effects()}}, which calls
the barebone implementation \code{\link[=.pd]{.pd()}} to calculate PD.
As grid points, it uses the arithmetic mean of \code{X} per bin (specified by \code{breaks}),
and eventually weighted by \code{w}.
}
\section{Methods (by class)}{
\itemize{
\item \code{partial_dependence(default)}: Default method.

\item \code{partial_dependence(ranger)}: Default method.

\item \code{partial_dependence(explainer)}: Default method.

}}
\examples{
fit <- lm(Sepal.Length ~ ., data = iris)
M <- partial_dependence(fit, v = "Species", data = iris)
M |> plot()

M2 <- partial_dependence(fit, v = colnames(iris)[-1], data = iris)
plot(M2, share_y = "all")
}
\references{
Friedman, Jerome H. 2001, \emph{Greedy Function Approximation: A Gradient Boosting Machine.}
Annals of Statistics 29 (5): 1189-1232. doi:10.1214/aos/1013203451.
}
\seealso{
\code{\link[=feature_effects]{feature_effects()}}, \code{\link[=.pd]{.pd()}}, \code{\link[=ale]{ale()}}.
}
