% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_dataframe.R
\name{get_dataframe_by_name}
\alias{get_dataframe_by_name}
\alias{get_dataframe_by_id}
\alias{get_dataframe_by_doi}
\title{Download dataverse file as a dataframe}
\usage{
get_dataframe_by_name(
  filename,
  dataset = NULL,
  .f = NULL,
  original = FALSE,
  ...
)

get_dataframe_by_id(fileid, .f = NULL, original = FALSE, ...)

get_dataframe_by_doi(filedoi, .f = NULL, original = FALSE, ...)
}
\arguments{
\item{filename}{The name of the file of interest, with file extension, for example
\code{"roster-bulls-1996.tab"}. Can be a vector for multiple files.}

\item{dataset}{A character specifying a persistent identification ID for a dataset,
for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class
\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.}

\item{.f}{The function to used for reading in the raw dataset. The user
must choose the appropriate function: for example if the target is a .rds
file, then \code{.f} should be \code{readRDS} or \code{readr::read_rds}. It can be a custom
function defined by the user. See examples for details.}

\item{original}{A logical, whether to read the ingested,
archival version of the datafile if one exists. If \code{TRUE}, users should supply
a function to use to read in the original. The archival versions are tab-delimited
\code{.tab} files so if \code{original = FALSE}, \code{.f} is set to \code{readr::read_tsv}.}

\item{...}{
  Arguments passed on to \code{\link[=get_file]{get_file}}
  \describe{
    \item{\code{file}}{An integer specifying a file identifier; or a vector of integers
specifying file identifiers; or, if used with the prefix \code{"doi:"}, a
character with the file-specific DOI; or, if used without the prefix, a
filename accompanied by a dataset DOI in the \code{dataset} argument, or an object of
class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}.
Can be a vector for multiple files.}
    \item{\code{format}}{A character string specifying a file format for download.
by default, this is \dQuote{original} (the original file format). If \code{NULL},
no query is added, so ingested files are returned in their ingested TSV form.
For tabular datasets, the option \dQuote{bundle} downloads the bundle
of the original and archival versions, as well as the documentation.
See \url{https://guides.dataverse.org/en/latest/api/dataaccess.html} for details.}
    \item{\code{vars}}{A character vector specifying one or more variable names, used to
extract a subset of the data.}
    \item{\code{key}}{A character string specifying a Dataverse server API key. If one
is not specified, functions calling authenticated API endpoints will fail.
Keys can be specified atomically or globally using
\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.}
    \item{\code{server}}{A character string specifying a Dataverse server.
Multiple Dataverse installations exist, with \code{"dataverse.harvard.edu"} being the
most major. The server can be defined each time within a function, or it can
be set as a default via an environment variable. To set a default, run
\code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")}
or add \verb{DATAVERSE_SERVER = "dataverse.harvard.edu} in one's \code{.Renviron}
file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.}
  }}

\item{fileid}{A numeric ID internally used for \code{get_file_by_id}. Can be a vector for multiple files.}

\item{filedoi}{A DOI for a single file (not the entire dataset), of the form
\code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}.
Can be a vector for multiple files.}
}
\value{
A R object that is returned by the default or user-supplied function
\code{.f} argument. For example, if \code{.f = readr::read_tsv()}, the function will
return a dataframe as read in by \code{readr::read_tsv()}. If the file identifier
is a vector, it will return a list where each slot corresponds to elements of the vector.
}
\description{
Reads in the Dataverse file into the R environment with any
user-specified function, such as \code{read.csv} or \code{readr} functions.

Use \code{get_dataframe_by_name} if you know the name of the datafile and the DOI
of the dataset. Use \code{get_dataframe_by_doi} if you know the DOI of the datafile
itself. Use \code{get_dataframe_by_id} if you know the numeric ID of the
datafile. For files that are not datasets, the more generic \code{get_file} that
downloads the content as a binary is simpler.
}
\examples{
\dontrun{
# 1. For files originally in plain-text (.csv, .tsv), we recommend
# retreiving data.frame from dataverse DOI and file name, or the file's DOI.

df_tab <-
  get_dataframe_by_name(
    filename = "roster-bulls-1996.tab",
    dataset  = "doi:10.70122/FK2/HXJVJU",
    server   = "demo.dataverse.org"
  )

df_tab <-
  get_dataframe_by_doi(
    filedoi      = "10.70122/FK2/HXJVJU/SA3Z2V",
    server       = "demo.dataverse.org"
  )

# 2. For files where Dataverse's ingest loses information (Stata .dta, SPSS .sav)
# or cannot be ingested (R .rds), we recommend
# specifying `original = TRUE` and specifying a read-in function in .f.

# Rds files are not ingested so original = TRUE and .f is required.
if (requireNamespace("readr", quietly = TRUE)) {
  df_from_rds_original <-
    get_dataframe_by_name(
      filename   = "nlsw88_rds-export.rds",
      dataset    = "doi:10.70122/FK2/PPIAXE",
      server     = "demo.dataverse.org",
      original   = TRUE,
      .f         = readr::read_rds
    )
}

# Stata dta files lose attributes such as value labels upon ingest so
# reading the original version by a Stata reader such as `haven` is recommended.
if (requireNamespace("haven", quietly = TRUE)) {
  df_stata_original <-
    get_dataframe_by_name(
      filename   = "nlsw88.tab",
      dataset    = "doi:10.70122/FK2/PPIAXE",
      server     = "demo.dataverse.org",
      original   = TRUE,
      .f         = haven::read_dta
    )
}

# 3. RData files are read in by `base::load()` but cannot be assigned to an
# object name. The following shows two possible ways to read in such files.

# First, without relying on `get_dataframe_*`, write as a binary file:
as_binary <- get_file_by_doi(
 filedoi = "doi:10.70122/FK2/PPIAXE/5VPXKE",
 server = "demo.dataverse.org")

temp <- tempdir()
writeBin(as_binary, path(temp, "county.RData"))
load(path(temp, "county.RData"))

# If you are certain each RData contains only one object, one could define a
# custom function used in https://stackoverflow.com/a/34926943
load_object <- function(file) {
  tmp <- new.env()
  load(file = file, envir = tmp)
  tmp[[ls(tmp)[1]]]
}

# https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/PPIAXE/X2FC5V
as_rda <- get_dataframe_by_id(
  file = 1939003,
  server = "demo.dataverse.org",
  .f = load_object,
  original = TRUE)
}

}
