% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MAXLEN_est.R
\name{MAXLEN_est}
\alias{MAXLEN_est}
\title{Estimate MAXLEN}
\usage{
MAXLEN_est(data, probs, alpha = 0.01, frequent = FALSE)
}
\arguments{
\item{data}{Dataset; needs to be of class data.frame and consist of factor variables only.}

\item{probs}{List of probability vectors for each variable. Each element of the list must
include as many probabilities as the number of levels associated with it in the dataset.}

\item{alpha}{Significance level for the simultaneous Multinomial confidence intervals constructed, determining what the
frequency thresholds should be for itemsets of different length, used for outlier detection for discrete features. Must be a positive real, at most equal to 0.50. A
greater value leads to a much more conservative algorithm. Default value is 0.01.}

\item{frequent}{Logical determining whether highly frequent or highly infrequent itemsets are considered as outliers. Defaults
to FALSE, treating highly infrequent itemsets as outlying.}
}
\value{
Estimated MAXLEN value.
}
\description{
Function estimating the value of MAXLEN (stopping criterion) prior to running the SONO algorithm. The estimation is done using
the ideas described in \insertCite{costa_novel_2025;textual}{SONO}, using simultaneous confidence intervals for Multinomial proportions, as done by
\insertCite{sison_simultaneous_1995;textual}{SONO}.
}
\examples{
dt <- as.data.frame(sample(c(1:2), 100, replace = TRUE, prob = c(0.5, 0.5)))
dt <- cbind(dt, sample(c(1:3), 100, replace = TRUE, prob = c(0.5, 0.3, 0.2)))
dt[, 1] <- as.factor(dt[, 1])
dt[, 2] <- as.factor(dt[, 2])
colnames(dt) <- c('V1', 'V2')
MAXLEN_est(data = dt, probs = list(c(0.5, 0.5), c(1/3, 1/3, 1/3)), alpha = 0.01, frequent = FALSE)

}
\references{
{
\insertRef{costa_novel_2025}{SONO}

\insertRef{sison_simultaneous_1995}{SONO}
}
}
