% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lsa.R
\name{lsa}
\alias{lsa}
\title{Latent Semantic Analysis model}
\usage{
lsa(
  x,
  dim = 50,
  min_count = 5L,
  engine = c("RSpectra", "irlba", "rsvd"),
  weight = "count",
  verbose = FALSE,
  ...
)
}
\arguments{
\item{x}{a \link[quanteda:tokens]{quanteda::tokens} object.}

\item{dim}{the size of the word vectors.}

\item{min_count}{the minimum frequency of the words. Words less frequent than
this in \code{x} are removed before training.}

\item{engine}{select the engine perform SVD to generate word vectors.}

\item{weight}{weighting scheme passed to \code{\link[quanteda:dfm_weight]{quanteda::dfm_weight()}}.}

\item{verbose}{if \code{TRUE}, print the progress of training.}

\item{...}{additional arguments.}
}
\value{
Returns a textmodel_wordvector object with the following elements:
\item{vectors}{a matrix for word vectors.}
\item{frequency}{the frequency of words in \code{x}.}
\item{engine}{the SVD engine used.}
\item{weight}{weighting scheme.}
\item{concatenator}{the concatenator in \code{x}.}
\item{call}{the command used to execute the function.}
\item{version}{the version of the wordvector package.}
}
\description{
Train a Latent Semantic Analysis model (Deerwester et al., 1990) on a \link[quanteda:tokens]{quanteda::tokens} object.
}
\examples{
\donttest{
library(quanteda)
library(wordvector)

# pre-processing
corp <- corpus_reshape(data_corpus_news2014)
toks <- tokens(corp, remove_punct = TRUE, remove_symbols = TRUE) \%>\% 
   tokens_remove(stopwords("en", "marimo"), padding = TRUE) \%>\% 
   tokens_select("^[a-zA-Z-]+$", valuetype = "regex", case_insensitive = FALSE,
                 padding = TRUE) \%>\% 
   tokens_tolower()

# train LSA
lsa <- lsa(toks, dim = 50, min_count = 5, verbose = TRUE, )
head(similarity(lsa, c("berlin", "germany", "france"), mode = "word"))
analogy(lsa, ~ berlin - germany + france)
}
}
\references{
Deerwester, S. C., Dumais, S. T., Landauer, T. K., Furnas, G. W., & Harshman, R. A. (1990).
Indexing by latent semantic analysis. JASIS, 41(6), 391–407.
}
