% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cooccurrences.R, R/as.sparseMatrix.R,
%   R/enrich.R
\docType{class}
\name{Cooccurrences-class}
\alias{Cooccurrences-class}
\alias{as_igraph}
\alias{as.simple_triplet_matrix,Cooccurrences-method}
\alias{as_igraph,Cooccurrences-method}
\alias{subset,Cooccurrences-method}
\alias{decode,Cooccurrences-method}
\alias{kwic,Cooccurrences-method}
\alias{as.sparseMatrix,Cooccurrences-method}
\alias{enrich,Cooccurrences-method}
\title{Cooccurrences class for corpus/partition.}
\usage{
\S4method{as.simple_triplet_matrix}{Cooccurrences}(x)

\S4method{as_igraph}{Cooccurrences}(
  x,
  edge_attributes = c("ll", "ab_count", "rank_ll"),
  vertex_attributes = "count",
  as.undirected = TRUE,
  drop = getOption("polmineR.villainChars")
)

\S4method{subset}{Cooccurrences}(x, ..., by)

\S4method{decode}{Cooccurrences}(.Object)

\S4method{kwic}{Cooccurrences}(
  .Object,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  verbose = TRUE,
  progress = TRUE
)

\S4method{as.sparseMatrix}{Cooccurrences}(x, col = "ab_count", ...)

\S4method{enrich}{Cooccurrences}(.Object)
}
\arguments{
\item{x}{A \code{Cooccurrences} class object.}

\item{edge_attributes}{Attributes from stat \code{data.table} in x to add to edges.}

\item{vertex_attributes}{Vertex attributes to add to nodes.}

\item{as.undirected}{Logical, whether to return directed or undirected graph.}

\item{drop}{A character vector indicating names of nodes to drop from
\code{igraph} object that is prepared.}

\item{...}{Further arguments passed into a further call of \code{subset}.}

\item{by}{A \code{features}-class object.}

\item{.Object}{A \code{Cooccurrences}-class object.}

\item{left}{Number of tokens to the left of the node.}

\item{right}{Number of tokens to the right of the node.}

\item{verbose}{Logical.}

\item{progress}{Logical, whether to show progress bar.}

\item{col}{A column to extract.}
}
\description{
The \code{Cooccurrences}-class stores the information for all cooccurrences
in a corpus. As this data can be bulky, in-place modifications of the
\code{data.table} in the stat-slot of a \code{Cooccurrences}-object are used
wherever possible, to avoid copying potentially large objects whenever
possible. The class inherits from the \code{textstat}-class, so that methods
for \code{textstat}-objects are inherited (see examples).
}
\details{
The \code{as.simple_triplet_matrix}-method will transform a
\code{Cooccurrences} object into a sparse matrix. For reasons of memory
efficiency, decoding token ids is performed within the method at the
as late as possible. It is NOT necessary that decoded tokens are present
in the table in the \code{Cooccurrences} object.

The \code{as_igraph}-method can be used to turn an object of the \code{Cooccurrences}-class
into an \code{igraph}-object.

The \code{subset} method, as a particular feature, allows a
\code{Coocccurrences}-object to be subsetted by a \code{featurs}-Object
resulting from a features extraction that compares two Cooccurrences
objects.

For reasons of memory efficiency, the initial \code{data.table} in
the slot \code{stat} of a \code{Cooccurrences}-object will identify tokens by an
integer id, not by the string of the token. The \code{decode()}-method will
replace these integer columns with human-readable character vectors. Due to
the reference logic of the \code{data.table} object, this is an in-place
operation, peformed without copying the table. The modified object is
returned invisibly; usually it will not be necessary to catch the return
value.

The \code{kwic}-method will add a column to the \code{data.table} in
the \code{stat}-slot with the concordances that are behind a statistical
finding, and to the \code{data.table} in the \code{stat}-slot of the
\code{partition} in the slot \code{partition}. It is an in-place operation.

Returns a \code{sparseMatrix} based on the counts of term cooccurrences. At this stage,
it is required that decoded tokens are present.

The \code{enrich()}-method will add columns 'a_count' and 'b_count' to
the \code{data.table} in the 'stat' slot of the \code{Cooccurrences} object. If the
count for the subcorpus/partition from which the cooccurrences are derived
is not yet present, the count is performed first.
}
\section{Slots}{

\describe{
\item{\code{left}}{Single \code{integer} value, number of tokens to the left of the node.}

\item{\code{right}}{Single \code{integer} value, number of tokens to the right of the node.}

\item{\code{p_attribute}}{A \code{character} vector, the p-attribute(s) the evaluation of the corpus is based on.}

\item{\code{corpus}}{Length-one \code{character} vector, the CWB corpus used.}

\item{\code{stat}}{A \code{data.table} with the statistical analysis of cooccurrences.}

\item{\code{encoding}}{Length-one \code{character} vector, the encoding of the corpus.}

\item{\code{partition}}{The \code{partition} that is the basis for computations.}

\item{\code{window_sizes}}{A \code{data.table} linking the number of tokens in the
context of a token identified by id.}

\item{\code{minimized}}{Logical, whether the object has been minimized.}
}}

\examples{
use(pkg = "RcppCWB", corpus = "REUTERS")

X <- Cooccurrences("REUTERS", p_attribute = "word", left = 2L, right = 2L)
m <- as.simple_triplet_matrix(X)
\dontrun{
use(pkg = "RcppCWB", corpus = "REUTERS")

X <- Cooccurrences("REUTERS", p_attribute = "word", left = 5L, right = 5L)
decode(X)
sm <- as.sparseMatrix(X)
stm <- as.simple_triplet_matrix(X)
}
}
\seealso{
See the documentation of the \code{\link{Cooccurrences}}-method
(including examples) for procedures to get and filter cooccurrence
information. See the documentation for the \code{\link{textstat-class}}
explaining which methods for this superclass of the
\code{Cooccurrences}-class which are available.
}
