% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CalSVG_nnSVG.R
\name{CalSVG_nnSVG}
\alias{CalSVG_nnSVG}
\title{nnSVG: Nearest-Neighbor Gaussian Process SVG Detection}
\usage{
CalSVG_nnSVG(
  expr_matrix,
  spatial_coords,
  X = NULL,
  n_neighbors = 10L,
  order = c("AMMD", "Sum_coords"),
  cov_model = c("exponential", "gaussian", "spherical", "matern"),
  adjust_method = "BH",
  n_threads = 1L,
  verbose = FALSE
)
}
\arguments{
\item{expr_matrix}{Numeric matrix of gene expression values.
\itemize{
  \item Rows: genes
  \item Columns: spatial locations (spots/cells)
  \item Values: log-normalized counts (e.g., from scran::logNormCounts)
}}

\item{spatial_coords}{Numeric matrix of spatial coordinates.
\itemize{
  \item Rows: spatial locations (must match columns of expr_matrix)
  \item Columns: x, y coordinates
}}

\item{X}{Optional numeric matrix of covariates to regress out.
\itemize{
  \item Rows: spatial locations (same order as spatial_coords)
  \item Columns: covariates (e.g., batch, cell type indicators)
}
Default is NULL (intercept-only model).}

\item{n_neighbors}{Integer. Number of nearest neighbors for NNGP model.
Default is 10.
\itemize{
  \item 5-10: Faster, captures local patterns
  \item 15-20: Better likelihood estimates, slower
}
Values > 15 rarely improve results but increase computation time.}

\item{order}{Character string specifying coordinate ordering scheme.
\itemize{
  \item \code{"AMMD"} (default): Approximate Maximum Minimum Distance.
    Better for most datasets. Requires >= 65 spots.
  \item \code{"Sum_coords"}: Order by sum of coordinates.
    Use for very small datasets (< 65 spots).
}}

\item{cov_model}{Character string specifying the covariance function.
Default is "exponential".
\itemize{
  \item \code{"exponential"}: Most commonly used, computationally stable
  \item \code{"gaussian"}: Smoother patterns, requires stabilization
  \item \code{"spherical"}: Finite range correlation
  \item \code{"matern"}: Flexible smoothness (includes additional nu parameter)
}}

\item{adjust_method}{Character string for p-value adjustment.
Default is "BH" (Benjamini-Hochberg).}

\item{n_threads}{Integer. Number of parallel threads. Default is 1.
Set to number of available cores for faster computation.}

\item{verbose}{Logical. Print progress messages. Default is FALSE.}
}
\value{
A data.frame with SVG detection results. Columns:
  \itemize{
    \item \code{gene}: Gene identifier
    \item \code{sigma.sq}: Spatial variance estimate (sigma^2)
    \item \code{tau.sq}: Nonspatial variance estimate (tau^2, nugget)
    \item \code{phi}: Range parameter estimate (controls spatial correlation decay)
    \item \code{prop_sv}: Proportion of spatial variance = sigma.sq / (sigma.sq + tau.sq)
    \item \code{loglik}: Log-likelihood of spatial model
    \item \code{loglik_lm}: Log-likelihood of non-spatial model (linear model)
    \item \code{LR_stat}: Likelihood ratio test statistic = -2 * (loglik_lm - loglik)
    \item \code{rank}: Rank by LR statistic (1 = highest)
    \item \code{p.value}: P-value from chi-squared distribution (df = 2)
    \item \code{p.adj}: Adjusted p-value
    \item \code{runtime}: Computation time per gene (seconds)
  }
}
\description{
Detect spatially variable genes using nnSVG, a method based on
nearest-neighbor Gaussian processes for scalable spatial modeling.

nnSVG uses nearest-neighbor Gaussian processes (NNGP) to model spatial
correlation structure in gene expression. It performs likelihood ratio
tests comparing spatial vs. non-spatial models to identify SVGs.
}
\details{
\strong{Method Overview:}

nnSVG models gene expression as a Gaussian process:
\deqn{y = X\beta + \omega + \epsilon}

where:
\itemize{
  \item y = expression vector
  \item X = covariate matrix, beta = coefficients
  \item omega ~ GP(0, sigma^2 * C(phi)) = spatial random effect
  \item epsilon ~ N(0, tau^2) = non-spatial noise
  \item C(phi) = covariance function with range phi
}

\strong{Nearest-Neighbor Approximation:}
Full GP has O(n^3) complexity. NNGP approximates using only k nearest
neighbors, reducing complexity to O(n * k^3) = O(n).

\strong{Statistical Test:}
Likelihood ratio test comparing:
\itemize{
  \item H0 (null): y = X*beta + epsilon (no spatial effect)
  \item H1 (alternative): y = X*beta + omega + epsilon (with spatial effect)
}
LR statistic follows chi-squared with df = 2 (testing sigma.sq and phi).

\strong{Effect Size:}
Proportion of spatial variance (prop_sv) measures effect size:
\itemize{
  \item prop_sv near 1: Strong spatial pattern
  \item prop_sv near 0: Little spatial structure
}

\strong{Computational Notes:}
\itemize{
  \item Requires BRISC package for NNGP fitting
  \item O(n) complexity per gene with NNGP approximation
  \item Parallelization over genes provides good speedup
  \item Memory: O(n * k) per gene
}
}
\examples{
# Load example data
data(example_svg_data)
expr <- example_svg_data$logcounts[1:10, ]  # Small subset
coords <- example_svg_data$spatial_coords

\donttest{
# Basic usage (requires BRISC package)
if (requireNamespace("BRISC", quietly = TRUE)) {
    results <- CalSVG_nnSVG(expr, coords, verbose = FALSE)
    head(results)
}
}

}
\references{
Weber, L.M. et al. (2023) nnSVG for the scalable identification of
spatially variable genes using nearest-neighbor Gaussian processes.
Nature Communications.

Datta, A. et al. (2016) Hierarchical Nearest-Neighbor Gaussian Process
Models for Large Geostatistical Datasets. JASA.
}
\seealso{
\code{\link{CalSVG}}, BRISC package documentation
}
