% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict_race.R
\name{predict_race}
\alias{predict_race}
\title{Race prediction function.}
\usage{
predict_race(voter.file, census.surname = TRUE, surname.only = FALSE,
  surname.year = 2010, census.geo, census.key, census.data = NA,
  age = FALSE, sex = FALSE, party)
}
\arguments{
\item{voter.file}{An object of class \code{data.frame}. 
Must contain a row for each individual being predicted, 
as well as a field named \code{\var{surname}} containing each individual's surname.
If using geolocation in predictions, \code{\var{voter.file}} must contain a field named 
\code{\var{state}}, which contains the two-character abbreviation for each individual's 
state of residence (e.g., \code{"nj"} for New Jersey). 
If using Census geographic data in race/ethnicity predictions, 
\code{\var{voter.file}} must also contain at least one of the following fields: 
\code{\var{county}}, \code{\var{tract}}, and/or \code{\var{block}}. 
These fields should contain character strings matching U.S. Census categories. 
County is three characters (e.g., \code{"031"} not \code{"31"}), 
tract is six characters, and block is four characters. 
See below for other optional fields.}

\item{census.surname}{A \code{TRUE}/\code{FALSE} object. If \code{TRUE}, 
function will call \code{merge_surnames} to merge in Pr(Race | Surname) 
from U.S. Census Surname List (2000 or 2010) and Spanish Surname List. 
If \code{FALSE}, \code{voter.file} object must contain additional fields specifying 
Pr(Race | Surname), named as follows: \code{\var{p_whi}} for Whites, 
\code{\var{p_bla}} for Blacks, \code{\var{p_his}} for Hispanics/Latinos, 
\code{\var{p_asi}} for Asians, and/or \code{\var{p_oth}} for Other. 
Default is \code{TRUE}.}

\item{surname.only}{A \code{TRUE}/\code{FALSE} object. If \code{TRUE}, race predictions will 
only use surname data and calculate Pr(Race | Surnname). Default is \code{FALSE}.}

\item{surname.year}{A number to specify the year of the census surname statistics. 
These surname statistics is stored in the data, and will be automatically loaded.
The default value is \code{2010}, which means the surname statistics from the 
2010 census will be used. Currently, the other available choice is \code{2000}.}

\item{census.geo}{An optional character vector specifying what level of 
geography to use to merge in U.S. Census 2010 geographic data. Currently
\code{"county"}, \code{"tract"}, or \code{"block"} are supported.
Note: sufficient information must be in user-defined \code{\var{voter.file}} object. 
If \code{\var{census.geo} = "county"}, then \code{\var{voter.file}} 
must have column named \code{county}.
If \code{\var{census.geo} = "tract"}, then \code{\var{voter.file}} 
must have columns named \code{county} and \code{tract}.
And if \code{\var{census.geo} = "block"}, then \code{\var{voter.file}} 
must have columns named \code{county}, \code{tract}, and \code{block}.
Specifying \code{\var{census.geo}} will call \code{census_helper} function 
to merge Census geographic data at specified level of geography.}

\item{census.key}{A character object specifying user's Census API 
key. Required if \code{\var{census.geo}} is specified, because 
a valid Census API key is required to download Census geographic data.}

\item{census.data}{A list indexed by two-letter state abbreviations, 
which contains pre-saved Census geographic data. 
Can be generated using \code{get_census_data} function.}

\item{age}{An optional \code{TRUE}/\code{FALSE} object specifying whether to 
condition race predictions on age (in addition to surname and geolocation). 
Default is \code{FALSE}. Must be same as \code{\var{age}} in \code{\var{census.data}} object.
May only be set to \code{TRUE} if \code{census.geo} option is specified. 
If \code{TRUE}, \code{\var{voter.file}} should include a numerical variable \code{\var{age}}.}

\item{sex}{optional \code{TRUE}/\code{FALSE} object specifying whether to 
condition race predictions on sex (in addition to surname and geolocation).
Default is \code{FALSE}. Must be same as \code{\var{sex}} in \code{\var{census.data}} object.
May only be set to \code{TRUE} if \code{census.geo} option is specified. 
If \code{TRUE}, \code{\var{voter.file}} should include a numerical variable \code{\var{sex}}, 
where \code{\var{sex}} is coded as 0 for males and 1 for females.}

\item{party}{An optional character object specifying party registration field 
in \code{\var{voter.file}}, e.g., \code{\var{party} = "PartyReg"}. 
If specified, race/ethnicity predictions will be conditioned 
on individual's party registration (in addition to geolocation). 
Whatever the name of the party registration field in \code{\var{voter.file}}, 
it should be coded as 1 for Democrat, 2 for Republican, and 0 for Other.}
}
\value{
Output will be an object of class \code{data.frame}. It will 
 consist of the original user-input data with additional columns with 
 predicted probabilities for each of the five major racial categories: 
 \code{\var{pred.whi}} for White, 
 \code{\var{pred.bla}} for Black, 
 \code{\var{pred.his}} for Hispanic/Latino, 
 \code{\var{pred.asi}} for Asian/Pacific Islander, and 
 \code{\var{pred.oth}} for Other/Mixed.
}
\description{
\code{predict_race} makes probabilistic estimates of individual-level race/ethnicity.
}
\details{
This function implements the Bayesian race prediction methods outlined in 
Imai and Khanna (2015). The function produces probabilistic estimates of 
individual-level race/ethnicity, based on surname, geolocation, and party.
}
\examples{
data(voters)
predict_race(voters, surname.only = TRUE)
predict_race(voter.file = voters, surname.only = TRUE)
\dontrun{predict_race(voter.file = voters, census.geo = "tract", census.key = "...", age = TRUE)}
\dontrun{predict_race(voter.file = voters, census.geo = "tract", census.key = "...", 
party = "PID")}
\dontrun{CensusObj <- get_census_data("...", state = c("NY", "DC", "NJ")); 
predict_race(voter.file = voters, census.geo = "tract", census.data = CensusObj, party = "PID")}
\dontrun{CensusObj2 <- get_census_data("...", state = c("NY", "DC", "NJ"), age = TRUE, sex = TRUE); 
predict_race(voter.file = voters, census.geo = "tract", census.data = CensusObj2, 
age = TRUE, sex = TRUE, party = "PID")}
}

