% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/shenkWest.R
\name{shenkWest}
\alias{shenkWest}
\title{SELECT algorithm for calibration sampling}
\usage{
shenkWest(X,
          d.min = 0.6,
          pc = 0.95,
          rm.outlier = FALSE,
          .center = TRUE,
          .scale = FALSE)
}
\arguments{
\item{X}{a numeric matrix (optionally a data frame that can
be coerced to a numerical matrix).}

\item{d.min}{a minimum distance (default = 0.6).}

\item{pc}{the number of principal components retained in the computation
distance in the standardized Principal Component space (Mahalanobis distance).
If \code{pc < 1}, the number of principal components kept corresponds to the
number of components explaining at least (\code{pc * 100}) percent of the total
variance (default = 0.95).}

\item{rm.outlier}{logical. If \code{TRUE}, remove observations with a standardized
mahalanobis distance to the center of the data greater than 3
(default = \code{FALSE}).}

\item{.center}{logical. Indicates whether the input matrix should be centered
before Principal Component Analysis. Default set to \code{TRUE}.}

\item{.scale}{logical. Indicates whether the input matrix should be scaled
before Principal Component Analysis. Default set to \code{FALSE}.}
}
\value{
a \code{list} with components:
\itemize{
\item{'\code{model}'}{ numeric vector giving the row indices of the input data
selected for calibration}
\item{'\code{test}'}{ numeric vector giving the row indices of the remaining
observations}
\item{'\code{pc}'}{a numeric matrix of the scaled pc scores}
}
}
\description{
Select calibration samples from a large multivariate data using the SELECT
algorithm as described in Shenk and Westerhaus (1991).
}
\details{
The SELECT algorithm is an iterative procedure based on the standardized
Mahalanobis distance between observations.
First, the observation having the highest number of neighbours within a given
minimum distance is selected and its neighbours are discarded. The procedure
is repeated until there is no observation left.

If the \code{rm.outlier} argument is set to \code{TRUE}, outliers will be removed
before running the SELECT algorithm, using the CENTER algorithm of
Shenk and Westerhaus (1991), i.e. samples with a standardized Mahalanobis
distance \verb{>3} are removed.
}
\examples{
data(NIRsoil)
# reduce data size
NIRsoil$spc <- binning(X = NIRsoil$spc, bin.size = 5)
sel <- shenkWest(NIRsoil$spc, pc = .99, d.min = .3, rm.outlier = FALSE)
plot(sel$pc[, 1:2], xlab = "PC1", ylab = "PC2")
# points selected for calibration
points(sel$pc[sel$model, 1:2], pch = 19, col = 2)
# without outliers
sel <- shenkWest(NIRsoil$spc, pc = .99, d.min = .3, rm.outlier = TRUE)
plot(sel$pc[, 1:2], xlab = "PC1", ylab = "PC2")
# points selected for calibration
points(sel$pc[sel$model, 1:2], pch = 15, col = 3)
}
\references{
Shenk, J.S., and Westerhaus, M.O., 1991. Population Definition,
Sample Selection, and Calibration Procedures for Near Infrared Reflectance
Spectroscopy. Crop Science 31, 469-474.
}
\seealso{
\code{\link{kenStone}}, \code{\link{duplex}}, \code{\link{puchwein}}
}
\author{
Antoine Stevens
}
