% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sim.R
\name{SimulateMSeqC}
\alias{SimulateMSeqC}
\title{A Semiparametric Model-based Microbiome Sequencing Data Simulator for Longitudinal, Matched-pair, and Replicate Sampling Designs}
\description{
The function generates synthetic microbiome sequencing data for studying the performance of differential abundance analysis methods for correlated microbiome data 
generated in longitudinal, matched-pair and replicate sampling study designs. 
It uses a user-supplied (large) reference OTU table as a template to generate a synthetic OTU table of specified size. A subset of OTUs can be affected by
by a binary variable (group effect) and/or a time variable (temporal effect). Time X group interaction and confounder effects can also be simulated. 
 The function allows simulating different signal structures, i.e., the percentage of differential OTUs, their effect sizes, and their direction of change.
}

\usage{
SimulateMSeqC(	
  ref.otu.tab,
  model.paras = NULL,
  nSubject = 40,
  nOTU = 50,
  nTime = 2, 
  error.sd = 1, 
  MgX = 0.5, 
  SgX = 0, 
  X.diff.otu.pct = 0.1,
  grp.ratio = 1, 
  balanced.X = TRUE,
  MgT = 0, 
  SgT = 0, 
  SbT = 0, 
  T.diff.otu.pct = 0, 
  balanced.T = TRUE,
  MgXT = 0, 
  SgXT = 0, 
  XT.diff.otu.pct = 0, 
  balanced.XT = TRUE,
  conf.cov.cor = 0.6,
  confounder = c('X', 'T'),
  MgZ = 0.5,
  SgZ = 0, 
  Z.diff.otu.pct = 0.05,
  Z.nondiff.otu.pct = 0.1, 
  depth.mu = 10000,
  depth.theta = 5,
  depth.conf.factor = 0
)
}
\arguments{
\item{ref.otu.tab}{a matrix, the reference OTU count table (row - OTUs, column - samples), serving as the template for synthetic sample generation.}

\item{model.paras}{a list of Dirichlet-Multinomial parameters from running \code{GUniFrac:::EstPara(ref.otu.tab)}. As MLE-based parameter estimation is computationally intensive, 
for simulation studies with multiple iterations,  the parameters can be estiamted once and used repeatedly.}


\item{nSubject}{the number of subjects to be simulated.}

\item{nOTU}{the number of OTUs to be simulated. }

\item{nTime}{the number of time points to be simulated. }

\item{error.sd}{the standar deviation of the random error controlling the within-subject correlation strength. Large k = 1, small k = 4.}

\item{MgX}{a numeric value indicating the mean group (X) effect (log fold change ) across the associated OTUs. The default is 0.}

\item{SgX}{a numeric value indicating the variance of group (X) effect (log fold change) across the associated OTUs. The default is 0.}

\item{X.diff.otu.pct}{a numeric value between 0 and 1, the percentage of differential OTUs regarding the group (X) to be simulated.  The default is 0.1.}

\item{grp.ratio}{a numeric value between 0 and 1. Group size ratio. The default is 1, i.e., equal group size. }

\item{balanced.X}{a logical value.  TRUE - the direction of change for these group differential OTUs is random, FALSE - the direction of change is the same. The default is "balanced".}

\item{MgT}{a numeric value indicating the population mean of the time (T) effect (log fold change) across the associated OTUs. The default is 0.}

\item{SgT}{a numeric value indicating the population variance of the time (T) effect (log fold change) across the associated OTUs. The default is 0.}

\item{SbT}{a numeric value indicating the variance of time (T) effect (log fold change) across the subjects. This parameter is to generate a subject-level random slope 
(temporal trends differ by subjects). The default is 0. }

\item{T.diff.otu.pct}{a numeric value between 0 and 1, the percentage of time differential OTUs  to be simulated.  The default is 0.}

\item{balanced.T}{a logical value.  TRUE - the direction of change for these time differential OTUs is random, FALSE - the direction of change is the same. The default is "balanced".}

\item{MgXT}{a numeric value indicating the mean X:T interaction effect (log fold change) across the associated OTUs. The default is 0.}

\item{SgXT}{a numeric value indicating the variance of X:T interaction effect (log fold change) across the associated OTUs. The default is 0.}

\item{XT.diff.otu.pct}{a numeric value between 0 and 1, the percentage of X:T interaction OTUs to be simulated.  The default is 0.}

\item{balanced.XT}{a logical value.  TRUE - the direction of change for the interaction effects is random, FALSE - the direction of change is the same. The default is "balanced".}

\item{conf.cov.cor}{a numeric value between 0 and 1.  The correlation strength between the group and the confounder. The default is 0.6.}

\item{confounder}{one of 'X' or 'T', whether the confounder is correlated with 'X' or 'T'.}

\item{MgZ}{a numeric value indicating the mean confounder (Z) effect (log fold change) across the associated OTUs. The default is 0.}

\item{SgZ}{a numeric value indicating the variance of confounder (Z) effect (log fold change) across the associated OTUs. The default is 0.}

\item{Z.diff.otu.pct}{a numeric value between 0 and 1, less than the percentage of group differential OTUs,
 the percentage of confounder-associated OTUs, which are also group differential.  The default is 0.05.}

\item{Z.nondiff.otu.pct}{a numeric value between 0 and 1, less than the percentage of group non-differential OTUs,
the percentage of confounder-associated OTUs, which are not group differential.  The default is 0.1.}


\item{depth.mu}{the mean sequencing depth to be simulated. The default is 10,000.}

\item{depth.theta}{the theta value  of the negative binomial distribution controlling the variance (\code{mu + mu^2/theta}). The default is 5.}

\item{depth.conf.factor}{a numeric value controlling the dependence of the sequencing depth on the group variable (\code{depth.mu} * \code{exp(scale(X)} * \code{depth.conf.factor})). 
The default is 0, i.e., the depth is not different between groups. This parameter can be used to simulate depth confounding.}
}

\value{
Return a list with the elements:
\item{otu.tab.sim}{simulated OTU table}
\item{meta}{meta data containing the simulated covariates (group, time, confounder)}
\item{otu.names}{the names of the simulated OTUs}
\item{X.diff.otu.ind}{indices of the group differential OTUs}
\item{T.diff.otu.ind}{indices of the time differential OTUs}
\item{XT.diff.otu.ind}{indices of the OTUs with group:time interaction}
\item{Z.diff.otu.ind}{indices of OTUs affected by the confounder}
}

\details{

This function implements a semiparametric approach for realistic correlated microbiome data generation. The method draws random samples from a large reference dataset (non-parametric part) and uses
 these reference samples as templates to generate new samples (parametric part). Specifically, for each drawn reference sample, it infers the underlying composition based on a Bayesian model and 
 then adds group/time/group:time/confounder effects to the composition vector, based on which a new sequencing sample is generated. The method circumvents the difficulty in modeling the inter-subject variation
 of the microbiome composition.   

}
\references{
Yang, L. & Chen, J. Benchmarking Differential Abundance Analysis Methods for Correlated Microbiome Sequencing Data. Briefings in bioinformatics 24.1 (2023): bbac607.

}


\author{Lu Yang and Jun Chen}

\examples{

# Use throat microbiome for illustration
data(throat.otu.tab)
comm <- t(throat.otu.tab)
comm <- comm[rowMeans(comm != 0) > 0.2, ]

# Example1: Simulate replicate sampling data, 40 subjects each with two replicates (nTime =2), 
# two group comparison, 10\% group differential OTUs
\dontrun{
sim.obj <- SimulateMSeqC(ref.otu.tab= comm, 
                         nSubject = 40, nOTU = 50, nTime = 2,
                         # Within-subject correlation setting
                         error.sd = 1, 
                         # Group effect setting, unbalanced
                         MgX = 0.5, SgX = 0, X.diff.otu.pct = 0.1, grp.ratio = 1, 
                         balanced.X = FALSE,
                         # Time effect setting (No time effect)
                         MgT = 0, SgT = 0, SbT = 0, T.diff.otu.pct = 0,
                         # Interaction effect setting (No interaction effect)
                         MgXT = 0, SgXT = 0, XT.diff.otu.pct = 0, 
                         # Confounder effect setting
                         conf.cov.cor = 0.6, confounder = 'X',
                         MgZ = 0.5, SgZ = 0,  Z.diff.otu.pct = 0.05, Z.nondiff.otu.pct = 0.1, 
                         # Sequencing Depth setting
                         depth.mu = 10000, depth.theta = 5,  depth.conf.factor = 0)
}
# Example2: Simulate matched-pair data, 100 subjects each with pre- and post-treatment (nTime = 2), 
# 10\% differential OTUs
\dontrun{
sim.obj <- SimulateMSeqC(ref.otu.tab= comm, 
                         nSubject = 100, nOTU = 50, nTime = 2,
                         # Within-subject correlation setting
                         error.sd = 1, 
                         # Group effect setting (No group effect)
                         MgX = 0, SgX = 0, X.diff.otu.pct = 0, grp.ratio = 1, 
                         # Time effect setting (No random slope, SbT=0)
                         MgT = 0.5, SgT = 0, SbT = 0, T.diff.otu.pct = 0.1, 
                         # Interaction effect setting (No interaction effect)
                         MgXT = 0, SgXT = 0, XT.diff.otu.pct = 0,
                         # Confounder effect setting (T!)
                         conf.cov.cor = 0.6, confounder = 'T',
                         MgZ = 0, SgZ = 0,  Z.diff.otu.pct = 0.05, Z.nondiff.otu.pct = 0.1, 
                         # Sequencing Depth setting
                         depth.mu = 10000, depth.theta = 5,  depth.conf.factor = 0)
}

# Example3: Simulate the general longitudinal data, 40 Subjects each with five time points, 
# two groups, 10\% group differential OTUs, 10 \% time differential OTUs and 10 \% interaction OTUs.
\dontrun{
sim.obj <- SimulateMSeqC(ref.otu.tab= comm, 
                         nSubject = 40, nOTU = 50, nTime = 5,
                         # Within-subject correlation setting
                         error.sd = 1, 
                         # Group effect setting, balanced
                         MgX = 0.5, SgX = 0, X.diff.otu.pct = 0.1, grp.ratio = 1, 
                         balanced.X = TRUE,
                         # Time effect setting (random slope)
                         MgT = 0.5, SgT = 0, SbT = 0.5, T.diff.otu.pct = 0.1, 
                         # Interaction effect setting 
                         MgXT = 0.5, SgXT = 0, XT.diff.otu.pct = 0.1, 
                         # Confounder effect setting
                         conf.cov.cor = 0.6, confounder = 'X',
                         MgZ = 0.5, SgZ = 0,  Z.diff.otu.pct = 0.05, Z.nondiff.otu.pct = 0.1, 
                         # Depth setting
                         depth.mu = 10000, depth.theta = 5,  depth.conf.factor = 0)
}
}
\keyword{composition}
\keyword{microbiome}
\keyword{simulation}

