% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/StandardizeNomenclature.R
\name{StandardizeNomenclature}
\alias{StandardizeNomenclature}
\alias{StandardizeDataSet}
\title{Standardize Nomenclature}
\usage{
StandardizeNomenclature(x, thesaurus, mark.unknown = FALSE)

StandardizeDataSet(data, thesaurusSet = zoologThesaurus)
}
\arguments{
\item{x}{Character vector.}

\item{thesaurus}{A thesaurus object.}

\item{mark.unknown}{Logical. If \code{FALSE} (default) the strings not found in the
thesaurus are kept without change. If \code{TRUE} the strings not in the
thesaurus are set to \code{NA}.}

\item{data}{A data frame.}

\item{thesaurusSet}{A thesaurus set.}
}
\value{
\code{StandardizeNomenclature} returns a vector of the same length as the
input vector \code{x}. The names present in the thesaurus are set to their
corresponding category. The names not in the thesaurus are kept unchanged if
\code{mark.unknown=FALSE} (default) and set to \code{NA} if
\code{mark.unknown=TRUE}.

\code{StandardizeDataSet} returns a data frame with the same structure as
the input \code{data}, but standardizing its nomenclature according to a thesaurus set
including appropriate thesauri for its column names and for the values of
a set of columns.
}
\description{
Functions to map the user provided nomenclature into a standard one
as defined in a thesaurus.
}
\details{
\code{StandardizeNomenclature} standardizes a character vector
according to a given thesaurus.

\code{StandardizeDataSet} standardizes column names and values of
a data frame according to a thesaurus set.
}
\examples{
## Select the thesaurus for taxa present in the thesaurus set
## zoolog::zoologThesaurus:
thesaurus <- zoologThesaurus$taxon
thesaurus
## Standardize an heterodox vector of taxa:
StandardizeNomenclature(c("bota", "giraffe", "pig", "cattle"),
                        thesaurus)
## Observe that "giraffe" is kept unchanged since it is not included in
## any thesaurus category.
## But if mark.unknown is set to TRUE, it is marked as NA:
StandardizeNomenclature(c("bota", "giraffe", "pig", "cattle"),
                        thesaurus, mark.unknown = TRUE)

## This thesaurus is not case sensitive:
attr(thesaurus, "caseSensitive") #  == FALSE
## Thus, names are recognized independently of their case:
StandardizeNomenclature(c("bota", "BOTA", "Bota", "boTa"),
                        thesaurus)

## Load an example data frame:
dataFile <- system.file("extdata", "dataValenzuelaLamas2008.csv.gz",
                        package = "zoolog")
dataExample <- utils::read.csv2(dataFile,
                                na.strings = "",
                                encoding = "UTF-8")
## Observe mainly the first columns:
head(dataExample[,1:5])
## Stadardize the dataset:
dataStandardized <- StandardizeDataSet(dataExample, zoologThesaurus)
head(dataStandardized[,1:5])

}
\seealso{
\code{\link{zoologThesaurus}} for a description of the thesaurus and
thesaurus set structure,

\code{\link{ThesaurusReaderWriter}}, \code{\link{ThesaurusManagement}}
}
