% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generalize.hz.R
\name{generalize.hz}
\alias{generalize.hz}
\alias{generalizeHz,character-method}
\alias{generalizeHz}
\alias{generalizeHz,SoilProfileCollection-method}
\title{Generalize Horizon Names}
\usage{
generalize.hz(
  x,
  new,
  pattern,
  non.matching.code = "not-used",
  hzdepm = NULL,
  ordered = !missing(hzdepm),
  ...
)

\S4method{generalizeHz}{character}(
  x,
  new,
  pattern,
  non.matching.code = "not-used",
  hzdepm = NULL,
  ordered = !missing(hzdepm),
  ...
)

\S4method{generalizeHz}{SoilProfileCollection}(
  x,
  new,
  pattern,
  non.matching.code = "not-used",
  hzdepm = NULL,
  ordered = !missing(hzdepm),
  ghl = "genhz",
  ...
)
}
\arguments{
\item{x}{character vector of horizon names or a \code{SoilProfileCollection} object}

\item{new}{character vector of generalized horizon labels (GHL)}

\item{pattern}{character vector of REGEX patterns, same length as \code{new}}

\item{non.matching.code}{character, label used for any horizon not matched by \code{pattern}}

\item{hzdepm}{numeric vector of horizon mid-points; \code{NA} values in \code{hzdepm} will result in \code{non.matching.code} (or \code{NA} if not defined) in result}

\item{ordered}{logical, \code{TRUE} when \code{hzdepm} argument is specified}

\item{...}{additional arguments passed to \code{grep()} such as \code{perl = TRUE} for advanced REGEX}

\item{ghl}{Generalized Horizon Designation column name (to be created/updated when \code{x} is a \code{SoilProfileCollection})}
}
\value{
factor (possibly an ordered factor) of the same length as \code{x} (if character) or as number of horizons in \code{x} (if \code{SoilProfileCollection})
}
\description{
Generalize a vector of horizon names, based on new classes, and REGEX
patterns. Or create a new column \code{ghl} in a \code{SoilProfileCollection} (requires a horizon designation name to be defined for the collection, see details)
}
\details{
When \code{x} is a \code{SoilProfileCollection} the \code{ghl} column will be updated with the factor results. This requires that the "horizon designation name" metadata be defined for the collection to set the column for input designations.
}
\examples{

data(sp1)

# check original distribution of hz designations
table(sp1$name)

# generalized horizon labels
# character vector input
sp1$genhz <- generalizeHz(
  sp1$name,
  new = c('O','A','B','C','R'),
  pattern = c('O', '^A','^B','C','R'),
  ordered = TRUE
)

# see how we did / what we missed
table(sp1$genhz, sp1$name)


## a more advanced example, requries `perl = TRUE`
# example data
x <- c('A', 'AC', 'Bt1', '^AC', 'C', 'BC', 'CB')

# new labels
n <- c('A', '^AC', 'C')

# patterns:
# "A anywhere in the name"
# "literal '^A' anywhere in the name"
# "C anywhere in name, but without preceding A"
p <- c('A', '^A', '(?<!A)C')

# note additional argument
res <- generalizeHz(
  x, 
  new = n, 
  pattern = p, 
  perl = TRUE
)

# double-check: OK
table(res, x)

## apply to a SoilProfileCollection
data(sp1)
depths(sp1) <- id ~ top + bottom

# must set horizon designation metadata
hzdesgnname(sp1) <- 'name'

# result is a SoilProfileCollection
x <- generalizeHz(
  sp1,
  new = c('O','A','B','C','R'),
  pattern = c('O', '^A','^B','C','R'),
  ordered = TRUE
)

# GHL stored in 'genhz' column
x$genhz

# GHL metadata is set
GHL(x)

}
\references{
Beaudette, D.E., Roudier, P., Skovlin, J. (2016). Probabilistic Representation of Genetic Soil Horizons. In: Hartemink, A., Minasny, B. (eds) Digital Soil Morphometrics. Progress in Soil Science. Springer, Cham. https://doi.org/10.1007/978-3-319-28295-4_18
}
\seealso{
\code{\link[=hzdesgnname]{hzdesgnname()}}
}
\author{
D.E. Beaudette
}
\keyword{manip}
