% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/start.em.2level.R
\name{mult.em_2level}
\alias{mult.em_2level}
\title{EM algorithm for multivariate two level model with covariates}
\arguments{
\item{data}{A data set object; we denote the dimension to be \eqn{m}.}

\item{v}{Covariate(s).}

\item{K}{Number of mixture components, the default is \code{K = 2}.}

\item{steps}{Number of iterations, the default is \code{steps = 20}.}

\item{start}{Containing parameters involved in the proposed model (\code{p}, \code{alpha}, \code{z}, \code{beta}, \code{sigma}, \code{gamma}) in a list,
the starting values can be obtained through the use of \link{start_em}. More details can be found in \link{start_em}.}

\item{option}{Four options for selecting the starting values for the parameters in the model. The default is \code{option = 1}.
More details can be found in \link{start_em}.}

\item{var_fun}{There are two types of variance specifications; \code{var_fun = 1}, the same diagonal variance specification to all \code{K} components of the mixture;
\code{var_fun = 2}, different diagonal variance matrices for different components;
The default is \code{var_fun = 2}.}
}
\value{
The estimated parameters in the model \eqn{x_{ij} = \alpha + \beta z_k + \Gamma v_{ij} + \varepsilon_{ij}} obtained through the EM algorithm,
        where the upper-level unit is indexed by \eqn{i}, and the lower-level unit is indexed by \eqn{j}.
 \item{p}{The estimates for the parameter \eqn{\pi_k}, which is a vector of length \eqn{K}.}
 \item{alpha}{The estimates for the parameter \eqn{\alpha}, which is a vector of length \eqn{m}.}
 \item{z}{The estimates for the parameter \eqn{z_k}, which is a vector of length \eqn{K}.}
 \item{beta}{The estimates for the parameter \eqn{\beta}, which is a vector of length \eqn{m}.}
 \item{gamma}{The estimates for the parameter \eqn{\Gamma}, which is a matrix.}
 \item{sigma}{The estimates for the parameter \eqn{\Sigma_k}.
                     When \code{var_fun = 1}, \eqn{\Sigma_k} is a diagonal matrix and \eqn{\Sigma_k = \Sigma}, and we obtain a vector of the diagonal elements;
                     When \code{var_fun = 2}, \eqn{\Sigma_k} is a diagonal matrix, and we obtain \code{K} vectors of the diagonal elements.
                     }
 \item{W}{The posterior probability matrix.}
 \item{loglikelihood}{The approximated log-likelihood of the fitted model.}
 \item{disparity}{The disparity (\code{-2logL}) of the fitted model.}
 \item{number_parameters}{The number of parameters estimated in the EM algorithm.}
 \item{AIC}{The AIC value (\code{-2logL + 2number_parameters}).}
 \item{starting_values}{A list of starting values for parameters used in the EM algorithm.}
}
\description{
This function extends the one-level version \link{mult.em_1level},
and it is designed to obtain Maximum Likelihood Estimates (MLE) using the EM algorithm for nested (structured) multivariate data,
e.g. multivariate test scores (such as on numeracy, literacy)  of students nested in different classes or schools.
The resulting estimates can be applied for clustering or constructing league tables (ranking of observations).
With the inclusion of covariates, the model allows fitting a multivariate response model for further regression analysis.
Detailed information about the model used in this function can be found in Zhang et al. (2023). Note that this function is designed for multivariate data.
When the dimension of the data is 1, please use \link[npmlreg]{allvc} as an alternative. A warning message will also be displayed when the input data is a univariate dataset.
}
\note{
It is worth noting that due to the sequential nature of the updates within the M-step,
     this algorithm can be considered an ECM algorithm.
}
\examples{
\donttest{
##examples for data without covariates.
data(trading_data)
set.seed(49)
trade_res <- mult.em_2level(trading_data, K=4, steps = 10, var_fun = 2)

i_1 <- apply(trade_res$W, 1, which.max)
ind_certain <- rep(as.vector(i_1),c(4,5,5,3,5,5,4,4,5,5,5,5,5,5,5,5,5,5,
3,5,5,5,5,4,4,5,5,5,4,5,4,5,5,5,3,5,5,5,5,5,5,4,5,4))
colors <- c("#FF6600","#66BD63", "lightpink","purple")
plot(trading_data[,-3],pch = 1, col = colors[factor(ind_certain)])
legend("topleft", legend=c("Mass point 1", "Mass point 2","Mass point 3","Mass point 4"),
col=c("#FF6600","purple","#66BD63","lightpink"),pch = 1, cex=0.8)

###The Twins data
library(lme4)
set.seed(26)
twins_res <- mult.em_2level(twins_data[,c(1,2,3)],v=twins_data[,c(4,5,6)],
K=2, steps = 20, var_fun = 2)
coeffs <- twins_res$gamma
##Compare to the estimated coefficients obtained using individual two-level models (lmer()).
summary(lmer(SelfTouchCodable ~ Depression + PSS + Anxiety + (1 | id) ,
data=twins_data, REML = TRUE))$coefficients[2,1]
}
}
\references{
Zhang, Y., Einbeck, J. and Drikvandi, R. (2023). A multilevel multivariate response model for data with latent structures.
            In: Proceedings of the 37th International Workshop on Statistical Modelling, pages 343-348.
            Link on RG: \url{https://www.researchgate.net/publication/375641972_A_multilevel_multivariate_response_model_for_data_with_latent_structures}
}
\seealso{
\code{\link{mult.reg_2level}}.
}
