% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sipni.R
\name{sipni_data}
\alias{sipni_data}
\title{Download SI-PNI Vaccination Data}
\usage{
sipni_data(
  year,
  type = "DPNI",
  uf = NULL,
  month = NULL,
  vars = NULL,
  parse = TRUE,
  col_types = NULL,
  cache = TRUE,
  cache_dir = NULL,
  lazy = FALSE,
  backend = c("arrow", "duckdb")
)
}
\arguments{
\item{year}{Integer. Year(s) of the data. Required.}

\item{type}{Character. File type for FTP data (1994--2019). Default:
\code{"DPNI"} (doses applied). Use \code{"CPNI"} for vaccination coverage.
Ignored for years >= 2020 (API data is always microdata).}

\item{uf}{Character. Two-letter state abbreviation(s) to download.
If NULL (default), downloads all 27 states.
Example: \code{"SP"}, \code{c("SP", "RJ")}.}

\item{month}{Integer. Month(s) to download (1--12). For years >= 2020
(CSV), selects which monthly CSV files to download. For years <= 2019
(FTP), this parameter is ignored (FTP files are annual).
If NULL (default), downloads all 12 months.}

\item{vars}{Character vector. Variables to keep. If NULL (default),
returns all available variables. Use \code{\link{sipni_variables}()} to see
available variables.}

\item{parse}{Logical. If TRUE (default), converts columns to
appropriate types (integer, double, Date) based on the variable
metadata. Use \code{\link{sipni_variables}()} to see the target type for
each variable. Set to FALSE for backward-compatible all-character output.}

\item{col_types}{Named list. Override the default type for specific
columns. Names are column names, values are type strings:
\code{"character"}, \code{"integer"}, \code{"double"},
\code{"date_dmy"}, \code{"date_ymd"}, \code{"date_ym"}, \code{"date"}.
Example: \code{list(QT_DOSE = "character")} to keep QT_DOSE as character.}

\item{cache}{Logical. If TRUE (default), caches downloaded data for
faster future access.}

\item{cache_dir}{Character. Directory for caching. Default:
\code{tools::R_user_dir("healthbR", "cache")}.}

\item{lazy}{Logical. If TRUE, returns a lazy query object instead of a
tibble. Requires the \pkg{arrow} package. The lazy object supports
dplyr verbs (filter, select, mutate, etc.) which are pushed down
to the query engine before collecting into memory. Call
\code{dplyr::collect()} to materialize the result. Default: FALSE.}

\item{backend}{Character. Backend for lazy evaluation: \code{"arrow"}
(default) or \code{"duckdb"}. Only used when \code{lazy = TRUE}.
DuckDB backend requires the \pkg{duckdb} package.}
}
\value{
A tibble with vaccination data. Includes columns
\code{year} and \code{uf_source} to identify the source
when multiple years/states are combined.

\strong{Output differs by year range:}
\itemize{
\item \strong{1994--2019 (FTP)}: Aggregated data with DPNI (12 vars) or
CPNI (7 vars) columns, all character.
\item \strong{2020+ (CSV)}: Individual-level microdata with ~47 columns
(snake_case Portuguese), all character. Use
\code{sipni_variables(type = "API")} to see the full list.
}
}
\description{
Downloads and returns vaccination data from SI-PNI. For years 1994--2019,
data is downloaded from DATASUS FTP (aggregated doses/coverage). For years
2020+, data is downloaded from OpenDataSUS as monthly CSV bulk files
(individual-level microdata with one row per vaccination dose).
}
\details{
\strong{FTP data (1994--2019):}
Downloaded as plain .DBF files. SI-PNI FTP data is \strong{aggregated} (dose
counts and coverage rates per municipality, vaccine, and age group).
Two file types: DPNI (doses) and CPNI (coverage).

\strong{CSV data (2020+):}
Downloaded from OpenDataSUS as monthly CSV bulk files (national,
semicolon-delimited, latin1 encoding). Each monthly ZIP is ~1.4 GB.
This is \strong{individual-level microdata} (one row per vaccination dose,
~47 fields per record). The \code{type} parameter is ignored for CSV
years. Data is filtered by UF during chunked reading to avoid loading
the full national file into memory.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# FTP: doses applied in Acre, 2019
ac_doses <- sipni_data(year = 2019, uf = "AC")

# FTP: vaccination coverage in Acre, 2019
ac_cob <- sipni_data(year = 2019, type = "CPNI", uf = "AC")

# API: microdata for Acre, January 2024
ac_api <- sipni_data(year = 2024, uf = "AC", month = 1)

# API: select specific variables
sipni_data(year = 2024, uf = "AC", month = 1,
           vars = c("descricao_vacina", "tipo_sexo_paciente",
                    "data_vacina"))
\dontshow{\}) # examplesIf}
}
\seealso{
\code{\link{sipni_info}()} for type descriptions,
\code{\link{censo_populacao}()} for population denominators.

Other sipni: 
\code{\link{sipni_cache_status}()},
\code{\link{sipni_clear_cache}()},
\code{\link{sipni_dictionary}()},
\code{\link{sipni_info}()},
\code{\link{sipni_variables}()},
\code{\link{sipni_years}()}
}
\concept{sipni}
