% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/isotree_po.R
\name{isotree_po}
\alias{isotree_po}
\title{Build Isolation forest species distribution model and explain the
the model and outputs.}
\usage{
isotree_po(
  obs_mode = "imperfect_presence",
  obs,
  obs_ind_eval = NULL,
  variables,
  categ_vars = NULL,
  contamination = 0.1,
  ntrees = 100L,
  sample_size = 1,
  ndim = 1L,
  seed = 10L,
  ...,
  offset = 0,
  response = TRUE,
  spatial_response = TRUE,
  check_variable = TRUE,
  visualize = FALSE
)
}
\arguments{
\item{obs_mode}{(\code{string}) The mode of observations for training. It should
be one of \code{c("perfect_presence", "imperfect_presence", "presence_absence")}.
"perfect_presence" means presence-only occurrences without
errors/uncertainties/bias, which should be rare in reality.
"Imperfect_presence" means presence-only occurrences with
errors/uncertainties/bias, which should be a most common case.
"presence_absence" means presence-absence observations regardless quality.
See details to learn how to set it. The default is "imperfect_presence".}

\item{obs}{(\code{sf}) The \code{sf} of observation for training. It is recommended to
call function \code{\link{format_observation}} to format the
occurrence (\code{obs}) before passing it here.
Otherwise, make sure there is a column named "observation" for observation.}

\item{obs_ind_eval}{(\code{sf} or \code{NULL}) Optional \code{sf} of observations for
independent test. It is recommended to call function
\code{\link{format_observation}} to format the occurrence (\code{obs})
before passing it here. Otherwise, make sure there is a column named
"observation" for observation.
If \code{NULL}, no independent test set will be used. The default is \code{NULL}.}

\item{variables}{(\code{RasterStack} or \code{stars}) The stack of environmental variables.}

\item{categ_vars}{(\code{vector} of \code{character} or \code{NULL}) The names of categorical
variables. Must be the same as the names in \code{variables}.}

\item{contamination}{(\code{numeric}) The percentage of abnormal cases within a
dataset. Because \code{iForest} is an outlier detection algorithm. It picks up
abnormal cases (much fewer) from normal cases. This argument is used to set
how many abnormal cases should be there if the users have the power to control.
See details for how to set it. The value should be less than 0.5. Here we
constrain it in (0, 0.3]. The default value is 0.1.}

\item{ntrees}{(\code{integer}) The number of trees for the isolation forest. It must
be integer, which you could use function \code{\link{as.integer}} to convert to.
The default is \code{100L}.}

\item{sample_size}{(\code{numeric}) It should be a rate for sampling size in \verb{[0, 1]}.
The default is \code{1.0}.}

\item{ndim}{(\code{integer}) ExtensionLevel for isolation forest. It must
be integer, which you could use function \code{\link{as.integer}} to convert
to. Also, it must be no smaller than the dimension of environmental variables.
When it is 1, the model is a traditional isolation forest, otherwise the model
is an extended isolation forest. The default is 1.}

\item{seed}{(\code{integer}) The random seed used in the modeling. It should be an
integer. The default is \code{10L}.}

\item{...}{Other arguments that \code{\link{isolation.forest}} needs.}

\item{offset}{(\code{numeric}) The offset to adjust fitted suitability. The default
is zero. Highly recommend to leave it as default.}

\item{response}{(\code{logical}) If \code{TRUE}, generate response curves.
The default is \code{TRUE}.}

\item{spatial_response}{(\code{logical}) If \code{TRUE}, generate spatial response maps.
The default is \code{TRUE} because it might be slow. NOTE that here SHAP-based map
is not generated because it is slow. If you want it be mapped, you could call
function \code{\link{spatial_response}} to make it.}

\item{check_variable}{(\code{logical}) If \code{TRUE}, check the variable importance.
The default is \code{TRUE}.}

\item{visualize}{(\code{logical}) If \code{TRUE}, generate the essential figures
related to the model. The default is \code{FALSE}.}
}
\value{
(\code{POIsotree}) A list of
\itemize{
\item{model (\code{\link{isolation.forest}}) The threshold set in
function inputs}
\item{variables (\code{stars}) The formatted image stack of
environmental variables}
\item{observation (\code{\link{sf}}) A \code{\link{sf}} of training occurrence
dataset}
\item{background_samples (\code{\link{sf}}) A \code{\link{sf}} of background points
for training dataset evaluation or SHAP dependence plot}
\item{independent_test (\code{\link{sf}} or \code{NULL}) A \code{\link{sf}} of test
occurrence dataset}
\item{background_samples_test (\code{\link{sf}} or \code{NULL}) A \code{\link{sf}} of
background points for test dataset evaluation or SHAP dependence plot}
\item{vars_train (\code{\link{data.frame}}) A \code{\link{data.frame}} with values of each
environmental variables for training occurrence}
\item{pred_train (\code{\link{data.frame}}) A \code{\link{data.frame}} with values of
prediction for training occurrence}
\item{eval_train (\code{POEvaluation}) A list of presence-only evaluation metrics
based on training dataset. See details of \code{POEvaluation} in
\code{\link{evaluate_po}}}
\item{var_test (\code{\link{data.frame}} or \code{NULL}) A \code{\link{data.frame}} with values of each
environmental variables for test occurrence}
\item{pred_test (\code{\link{data.frame}} or \code{NULL}) A \code{\link{data.frame}} with values of
prediction for test occurrence}
\item{eval_test (\code{POEvaluation} or \code{NULL}) A list of presence-only evaluation metrics
based on test dataset.
See details of \code{POEvaluation} in \code{\link{evaluate_po}}}
\item{prediction (\code{stars}) The predicted environmental suitability}
\item{marginal_responses (\code{MarginalResponse} or \code{NULL}) A list of marginal response
values of each environmental variables.
See details in \code{\link{marginal_response}}}
\item{offset (\code{numeric}) The offset value set as inputs.}
\item{independent_responses (\code{IndependentResponse} or \code{NULL}) A list of independent
response values of each environmental variables.
See details in \code{\link{independent_response}}}
\item{shap_dependences (\code{ShapDependence} or \code{NULL}) A list of variable
dependence values of each environmental variables.
See details in \code{\link{shap_dependence}}}
\item{spatial_responses (\code{SpatialResponse} or \code{NULL}) A list of spatial variable
dependence values of each environmental variables.
See details in \code{\link{shap_dependence}}}
\item{variable_analysis (\code{VariableAnalysis} or \code{NULL}) A list of variable importance
analysis based on multiple metrics.
See details in \code{\link{variable_analysis}}}}
}
\description{
Call Isolation forest and its variations to do
species distribution modeling and optionally call a collection of other
functions to do model explanation.
}
\details{
For "perfect_presence", a user-defined number (\code{contamination}) of samples
will be taken from background to let \code{iForest} function normally.

If "imperfect_presence", no further actions is required.

If the \bold{obs_mode} is "presence_absence", a \code{contamination} percent
of absences will be randomly selected and work together with all presences
to train the model.

NOTE: \bold{obs_mode} and \bold{mode} only works for \code{obs}. \code{obs_ind_eval}
will follow its own structure.

Please read details of algorithm \code{\link{isolation.forest}} on
\url{https://github.com/david-cortes/isotree}, and
the R documentation of function \code{\link{isolation.forest}}.
}
\examples{
\donttest{
########### Presence-absence mode #################
library(dplyr)
library(sf)
library(stars)
library(itsdm)

# Load example dataset
data("occ_virtual_species")
obs_df <- occ_virtual_species \%>\% filter(usage == "train")
eval_df <- occ_virtual_species \%>\% filter(usage == "eval")
x_col <- "x"
y_col <- "y"
obs_col <- "observation"
obs_type <- "presence_absence"

# Format the observations
obs_train_eval <- format_observation(
  obs_df = obs_df, eval_df = eval_df,
  x_col = x_col, y_col = y_col, obs_col = obs_col,
  obs_type = obs_type)

# Load variables
env_vars <- system.file(
  'extdata/bioclim_tanzania_10min.tif',
  package = 'itsdm') \%>\% read_stars() \%>\%
  slice('band', c(1, 5, 12))

# Modeling
mod_virtual_species <- isotree_po(
  obs_mode = "presence_absence",
  obs = obs_train_eval$obs,
  obs_ind_eval = obs_train_eval$eval,
  variables = env_vars, ntrees = 10,
  sample_size = 0.6, ndim = 1L,
  seed = 123L)

# Check results
## Evaluation based on training dataset
print(mod_virtual_species$eval_train)
plot(mod_virtual_species$eval_train)

## Response curves
plot(mod_virtual_species$marginal_responses)
plot(mod_virtual_species$independent_responses,
     target_var = c('bio1', 'bio5'))
plot(mod_virtual_species$shap_dependence)

## Relationships between target var and related var
plot(mod_virtual_species$shap_dependence,
     target_var = c('bio1', 'bio5'),
     related_var = 'bio12', smooth_span = 0)

# Variable importance
mod_virtual_species$variable_analysis
plot(mod_virtual_species$variable_analysis)

########### Presence-absence mode ##################
# Load example dataset
data("occ_virtual_species")
obs_df <- occ_virtual_species \%>\% filter(usage == "train")
eval_df <- occ_virtual_species \%>\% filter(usage == "eval")
x_col <- "x"
y_col <- "y"
obs_col <- "observation"

# Format the observations
obs_train_eval <- format_observation(
  obs_df = obs_df, eval_df = eval_df,
  x_col = x_col, y_col = y_col, obs_col = obs_col,
  obs_type = "presence_only")

# Modeling with perfect_presence mode
mod_perfect_pres <- isotree_po(
  obs_mode = "perfect_presence",
  obs = obs_train_eval$obs,
  obs_ind_eval = obs_train_eval$eval,
  variables = env_vars, ntrees = 10,
  sample_size = 0.6, ndim = 1L,
  seed = 123L)

# Modeling with imperfect_presence mode
mod_imperfect_pres <- isotree_po(
  obs_mode = "imperfect_presence",
  obs = obs_train_eval$obs,
  obs_ind_eval = obs_train_eval$eval,
  variables = env_vars, ntrees = 10,
  sample_size = 0.6, ndim = 1L,
  seed = 123L)
}

}
\references{
\itemize{
\item{Liu, Fei
Tony, Kai Ming Ting, and Zhi-Hua Zhou. "Isolation forest."
\emph{2008 eighth ieee international conference on data mining}.IEEE, 2008.
\doi{10.1109/ICDM.2008.17}}
\item{Liu, Fei Tony, Kai Ming
Ting, and Zhi-Hua Zhou. "Isolation-based anomaly detection."
\emph{ACM Transactions on Knowledge Discovery from Data (TKDD)} 6.1 (2012): 1-39.
\doi{10.1145/2133360.2133363}}
\item{Liu, Fei Tony,
Kai Ming Ting, and Zhi-Hua Zhou. "On detecting clustered anomalies using
SCiForest." \emph{Joint European Conference on Machine Learning and
Knowledge Discovery in Databases}. Springer, Berlin, Heidelberg, 2010.
\doi{10.1007/978-3-642-15883-4_18}}
\item{Ha
riri, Sahand, Matias Carrasco Kind, and Robert J. Brunner. "Extended
isolation forest." \emph{IEEE Transactions on Knowledge and Data Engineering (2019)}.
\doi{10.1109/TKDE.2019.2947676}}
\item{\url{https://github.com/david-cortes/isotree}}
\item{References of related feature such as response curves and variable importance
will be listed under their own functions}
}
}
\seealso{
\code{\link{evaluate_po}}, \code{\link{marginal_response}},
\code{\link{independent_response}}, \code{\link{shap_dependence}},
\code{\link{spatial_response}}, \code{\link{variable_analysis}},
\code{\link{isolation.forest}}
}
