% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/qts-kmeans.R
\name{kmeans}
\alias{kmeans}
\alias{kmeans.default}
\alias{kmeans.qts_sample}
\title{QTS K-Means Alignment Algorithm}
\usage{
kmeans(x, n_clusters, ...)

\method{kmeans}{default}(
  x,
  n_clusters = 1,
  iter_max = 10,
  nstart = 1,
  algorithm = c("Hartigan-Wong", "Lloyd", "Forgy", "MacQueen"),
  trace = FALSE,
  ...
)

\method{kmeans}{qts_sample}(
  x,
  n_clusters = 1L,
  seeds = NULL,
  seeding_strategy = c("kmeans++", "exhaustive-kmeans++", "exhaustive", "hclust"),
  is_domain_interval = FALSE,
  transformation = c("identity", "srvf"),
  warping_class = c("none", "shift", "dilation", "affine", "bpd"),
  centroid_type = "mean",
  metric = c("l2", "normalized_l2", "pearson"),
  cluster_on_phase = FALSE,
  use_fence = FALSE,
  ...
)
}
\arguments{
\item{x}{Either a numeric matrix of data, or an object that can be coerced to
such a matrix (such as a numeric vector or a data frame with all numeric
columns) or an object of class \link{qts_sample}.}

\item{n_clusters}{An integer value specifying the number of clusters to be
look for.}

\item{...}{not used.}

\item{iter_max}{An integer value specifying the maximum number of iterations
for terminating the k-mean algorithm. Defaults to \code{10L}.}

\item{nstart}{if \code{centers} is a number, how many random sets
    should be chosen?}

\item{algorithm}{character: may be abbreviated.  Note that
    \code{"Lloyd"} and \code{"Forgy"} are alternative names for one
    algorithm.}

\item{trace}{logical or integer number, currently only used in the
    default method (\code{"Hartigan-Wong"}): if positive (or true),
    tracing information on the progress of the algorithm is
    produced.  Higher values may produce more tracing information.}

\item{seeds}{An integer value or vector specifying the indices of the initial
centroids. If an integer vector, it is interpreted as the indices of the
intial centroids and should therefore be of length \code{n_clusters}. If an
integer value, it is interpreted as the index of the first initial centroid
and subsequent centroids are chosen according to the k-means++ strategy. It
can be \code{NULL} in which case the argument \code{seeding_strategy} is used to
automatically provide suitable indices. Defaults to \code{NULL}.}

\item{seeding_strategy}{A character string specifying the strategy for
choosing the initial centroids in case the argument \code{seeds} is set to
\code{NULL}. Choices are
\href{https://en.wikipedia.org/wiki/K-means\%2B\%2B}{\code{"kmeans++"}},
\code{"exhaustive-kmeans++"} which performs an exhaustive search over the choice
of the first centroid, \code{"exhaustive"} which tries on all combinations of
initial centroids or \code{"hclust"} which first performs hierarchical
clustering using Ward's linkage criterion to identify initial centroids.
Defaults to \code{"kmeans++"}, which is the fastest strategy.}

\item{is_domain_interval}{A boolean specifying whether the sample of curves
is defined on a fixed interval. Defaults to \code{FALSE}.}

\item{transformation}{A string specifying the transformation to apply to the
original sample of curves. Choices are no transformation (\code{transformation = "identity"}) or square-root velocity function \code{transformation = "srvf"}.
Defaults to \code{"identity"}.}

\item{warping_class}{A string specifying the class of warping functions.
Choices are no warping (\code{warping_class = "none"}), shift \code{y = x + b}
(\code{warping_class = "shift"}), dilation \code{y = ax} (\code{warping_class = "dilation"}), affine \code{y = ax + b} (\code{warping_class = "affine"}) or
boundary-preserving diffeomorphism (\code{warping_class = "bpd"}). Defaults to
\code{"none"}.}

\item{centroid_type}{A string specifying the type of centroid to compute.
Choices are \code{"mean"}, \code{"median"} \code{"medoid"}, \code{"lowess"} or \code{"poly"}.
Defaults to \code{"mean"}. If LOWESS appproximation is chosen, the user can
append an integer between 0 and 100 as in \code{"lowess20"}. This number will be
used as the smoother span. This gives the proportion of points in the plot
which influence the smooth at each value. Larger values give more
smoothness. The default value is 10\%. If polynomial approximation is
chosen, the user can append an positive integer as in \code{"poly3"}. This
number will be used as the degree of the polynomial model. The default
value is \code{4L}.}

\item{metric}{A string specifying the metric used to compare curves. Choices
are \code{"l2"}, \code{"normalized_l2"} or \code{"pearson"}. If \code{transformation == "srvf"}, the metric \strong{must be} \code{"l2"} because the SRVF transform maps
absolutely continuous functions to square-integrable functions. If
\code{transformation == "identity"} and \code{warping_class} is either \code{dilation} or
\code{affine}, the metric cab be either \code{"normalized_l2"} or \code{"pearson"}. The L2
distance is indeed \strong{not} dilation-invariant or affine-invariant. The
metric can also be \code{"l2"} if \code{warping_class == "shift"}. Defaults to
\code{"l2"}.}

\item{cluster_on_phase}{A boolean specifying whether clustering should be
based on phase variation or amplitude variation. Defaults to \code{FALSE} which
implies amplitude variation.}

\item{use_fence}{A boolean specifying whether the fence algorithm should be
used to robustify the algorithm against outliers. Defaults to \code{FALSE}. This
is used only when \code{warping_class != "srvf"}.}
}
\value{
An object of class \code{\link[stats:kmeans]{stats::kmeans}} or \code{\link[stats:hclust]{stats::hclust}} or
\code{dbscan_fast} if the input \code{x} is NOT of class \code{\link{qts_sample}}. Otherwise,
an object of class \code{qtsclust} which is effectively a list with four
components:
\itemize{
\item \code{qts_aligned}: An object of class \code{\link{qts_sample}} storing the sample of
aligned QTS;
\item \code{qts_centers}: A list of objects of class \code{\link{qts}} representing the centers
of the clusters;
\item \code{best_clustering}: An object of class \code{\link[fdacluster:caps]{fdacluster::caps}} storing the
results of the best k-mean alignment result among all initialization that
were tried.
\item \code{call_name}: A string storing the name of the function that was used to
produce the clustering structure;
\item \code{call_args}: A list containing the exact arguments that were passed to
the function \code{call_name} that produced this output.
}
}
\description{
This function massages the input quaternion time series to feed them into the
k-means alignment algorithm for jointly clustering and aligning the input
QTS.
}
\examples{
out <- kmeans(vespa64$igp[1:10], n_clusters = 2)
}
