% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Set_DB.R
\name{Set_DB}
\alias{Set_DB}
\title{Build up a comprehensive database regarding the school system}
\usage{
Set_DB(
  Year = 2023,
  level = "LAU",
  conservative = TRUE,
  Invalsi = TRUE,
  SchoolBuildings = TRUE,
  nstud = TRUE,
  nteachers = TRUE,
  BroadBand = TRUE,
  verbose = TRUE,
  show_col_types = FALSE,
  Invalsi_subj = c("ELI", "ERE", "ITA", "MAT"),
  Invalsi_grade = c(2, 5, 8, 10, 13),
  Invalsi_WLE = FALSE,
  SchoolBuildings_certifications = FALSE,
  SchoolBuildings_include_numerics = TRUE,
  SchoolBuildings_include_qualitatives = FALSE,
  SchoolBuildings_row_cutout = FALSE,
  SchoolBuildings_col_cut_thresh = 20000,
  SchoolBuildings_flag_outliers = TRUE,
  SchoolBuildings_count_missing = FALSE,
  nstud_imputation_thresh = 19,
  nstud_missing_to_1 = FALSE,
  UB_nstud_byclass = 99,
  LB_nstud_byclass = 1,
  InnerAreas = TRUE,
  ord_InnerAreas = FALSE,
  nstud_check = TRUE,
  nstud_check_registry = "Any",
  BroadBand_impute_missing = TRUE,
  Date = as.Date(paste0(substr(year.patternA(Year), 1, 4), "-09-01")),
  NA_autoRM = NULL,
  input_Invalsi_IS = NULL,
  input_Registry = NULL,
  input_SchoolBuildings = NULL,
  input_nstud = NULL,
  input_School2mun = NULL,
  input_AdmUnNames = NULL,
  input_InnerAreas = NULL,
  input_teachers4student = NULL,
  input_nteachers = NULL,
  input_BroadBand = NULL,
  autoAbort = FALSE
)
}
\arguments{
\item{Year}{Numeric or Character. The relevant school year. Available in the formats: \code{2023}, \code{"2022/2023"}, \code{202223}, \code{20222023}.
Important: if input datasets are plugged in, please select the same \code{Year} argument used to download the input data. \code{2023} by default.}

\item{level}{Character. The administrative level of detail at which data must be aggregated.
Either \code{"LAU"}/\code{"Municipality"/"NUTS-4"} or \code{"NUTS-3"}/\code{"Province"}. \code{"LAU"} by default.}

\item{conservative}{Logical. If \code{FALSE}, only the schools included in all the datasets are taken as input. \code{TRUE} by default.}

\item{Invalsi}{Logical. Whether the Invalsi census data must be included (see \code{\link{Get_Invalsi_IS}}. \code{TRUE} by default.}

\item{SchoolBuildings}{Logical. Whether the school buildings dataset must be included (see \code{link{Get_DB_MIUR}}, \code{\link{Util_DB_MIUR_num}}. \code{TRUE} by default.}

\item{nstud}{Logical. Whether the students number per class must be included (see \code{\link{Get_nstud}}. \code{TRUE} by default.}

\item{nteachers}{Logical. Whether the number of teachers by province must be included (see \code{link{Get_nteachers_prov}}). \code{TRUE} by default.}

\item{BroadBand}{Logical. Whether the broadband availability in schools must be included (see \code{\link{Get_BroadBand}}). \code{TRUE} by default}

\item{verbose}{Logical. If \code{TRUE}, the user keeps track of the main underlying operations. \code{TRUE} by default.}

\item{show_col_types}{Logical. If \code{TRUE}, if the \code{verbose} argument is also \code{TRUE}, the columns of the raw dataset are shown during the download. \code{FALSE} by default.}

\item{Invalsi_subj}{Character. If \code{Invalsi == TRUE}, the school subject(s) to include, among \code{"Englis_listening"}/\code{"ELI"}, \code{"English_reading"}/\code{"ERE"}, \code{"Italian"}/\code{"Ita"} and \code{"Mathematics"}/\code{"MAT"}. All four by default.}

\item{Invalsi_grade}{Numeric. If \code{Invalsi == TRUE}, the educational grade to choose. Either \code{2} (2nd year of primary school), \code{5} (last year of primary school), \code{8} (last year of middle shcool), \code{10} (2nd year of high school) or \code{13} (last year of school). All by default.}

\item{Invalsi_WLE}{Logical. Whether to express Invalsi scores as averagev WLE score rather that the percentage of sufficient tests, if both are Invalsi_grade is either or \code{2} \code{5}. \code{FALSE} by default}

\item{SchoolBuildings_certifications}{Logical. If the school buldings database has to be downloaded, whether to include safety certifications.  Only relevant from schol year 2020/21 onwards (see \code{\link{Get_DB_MIUR}}). \code{FALSE} by default}

\item{SchoolBuildings_include_numerics}{Logical. Whether to include strictly numeric variables alongside with Boolean ones in the school buildings database (see \code{\link{Util_DB_MIUR_num}}). \code{TRUE} by default.}

\item{SchoolBuildings_include_qualitatives}{Logical. Whether to include qualitative variables alongside with Boolean ones in the school buildings database (see \code{\link{Util_DB_MIUR_num}}). \code{FALSE} by default.}

\item{SchoolBuildings_row_cutout}{Logical. Whether to filter out rows including missing fields in the school buildings database (see \code{\link{Util_DB_MIUR_num}}). \code{FALSE} by default.}

\item{SchoolBuildings_col_cut_thresh}{Numeric. The threshold of missing values allowed for each variable in the school buildings database (see \code{\link{Util_DB_MIUR_num}}).
If a variable as a higher number of missing observations, then it is cut out. \code{20.000} by default.
Warning: if the option \code{SchoolBuildings_row_cutout} is active, please select a lower threshold (e.g. \code{1000})}

\item{SchoolBuildings_flag_outliers}{Logical. Whether to assign NA to outliers in numeric variables; see \code{\link{Util_DB_MIUR_num}} for more details. \code{TRUE} by default.}

\item{SchoolBuildings_count_missing}{Logical. Whether the function should return the percentage of NAs in the input school buildings database (see also \code{\link{Group_DB_MIUR}}). \code{FALSE} by default.}

\item{nstud_imputation_thresh}{Numeric. If \code{nstud_missing_to_1 == TRUE}, the minimum threshold below which the number of classes is imputed to 1 if missing;
see also \code{\link{Util_nstud_wide}}. \code{19} by default.}

\item{nstud_missing_to_1}{Numeric. If \code{nstud == TRUE}, whether the number of classes should be imputed to 1 when it is missing and the number of students is below a threshold (argument \code{nstud_imputation_thresh}, see \code{\link{Util_nstud_wide}}). \code{FALSE} by default.}

\item{UB_nstud_byclass}{Numeric. The upper limit of the acceptable school-level average of the number of students by class if \code{nstud == TRUE}; see also \code{\link{Util_nstud_wide}}.  \code{99} by default, i.e. no restriction is made. Please notice that boundaries are included in the acceptance interval.}

\item{LB_nstud_byclass}{Numeric. The lower limit of the acceptable school-level average of the number of students by class if \code{nstud == TRUE}; see also \code{\link{Util_nstud_wide}}. \code{1} by default. Please notice that boundaries are included in the acceptance interval.}

\item{InnerAreas}{Logical. Whether the percentage of schools belonging to inner/internal areas must be included (see \code{\link{Get_InnerAreas}}). TRUE by default.}

\item{ord_InnerAreas}{Logical. If \code{check == TRUE} and \code{InnerAreas == TRUE}, whether the Inner areas classification should be treated as an ordinal variable rather than as a categorical one (see \code{\link{Get_InnerAreas}} for the classification). \code{FALSE} by default.}

\item{nstud_check}{Logical. If \code{nstud == TRUE}, whether to check the students number availability across all school included in the school registries (see \code{\link{Util_Check_nstud_availability}}). \code{TRUE} by default.}

\item{nstud_check_registry}{Character. If \code{nstud == TRUE} and \code{nstud_check == TRUE}, the school registries whose availability has to be checked. Either \code{"Registry_from_buildings"} (buildings registry), \code{"Registry_from_registry"} (proper registry), \code{"Any"} or \code{"Both"}. \code{"Any"} by default.}

\item{BroadBand_impute_missing}{Whether the schools not included in the Broadband dataset must be considered in the total of schools (i.e. the denominator to the Broadband availability indicator). \code{TRUE} by default.}

\item{Date}{Character or Date. The threshold date to broadband activation to consider it activated for a school, i.e. the date before which the works of broadband activation must be finished in order to consider a school as provided with the broadband. By default, September 1st at the beginning of the school year.}

\item{NA_autoRM}{Logical. Either \code{TRUE}, \code{FALSE} or \code{NULL}. If \code{TRUE}, the values missing in a single dataset are automatically deleted from the final DB. If \code{FALSE}, the missing observations are kept automatically. If \code{NULL}, the choice is left to the user by an interactive menu. \code{NULL} by default.}

\item{input_Invalsi_IS}{Object of class \code{tbl_df}, \code{tbl} and \code{data.frame}.
If \code{INVALSI == TRUE}, the raw Invalsi survey data, obtained as output of the \code{\link{Get_Invalsi_IS}} function.
If \code{NULL}, it will be downloaded automatically, but not saved in the global environment.
\code{NULL} by default}

\item{input_Registry}{Object of class \code{tbl_df}, \code{tbl} and \code{data.frame}.
The school registry corresponding to the year in scope, obtained as output of the function \code{\link{Get_Registry}}.
If \code{NULL}, it will be downloaded automatically, but not saved in the global environment.
\code{NULL} by default}

\item{input_SchoolBuildings}{Object of class \code{tbl_df}, \code{tbl} and \code{data.frame}. If \code{SchoolBuildings == TRUE}, the raw school buildings dataset obtained as output of the function \code{\link{Get_DB_MIUR}}.
If \code{NULL}, it will be downloaded automatically but not saved in the global environment. \code{NULL} by default.}

\item{input_nstud}{Object of class \code{list}, including two objects of class\code{tbl_df}, \code{tbl} and \code{data.frame}.
If \code{nstud == TRUE}, the students and classes counts, obtained as output of the function \code{\link{Get_nstud}} with default \code{filename} parameter.
If \code{NULL}, the function will download it automatically but it will not be saved in the global environment. \code{NULL} by default.}

\item{input_School2mun}{Object of class \code{list} with elements of class \code{tbl_df}, \code{tbl} and \code{data.frame}
If \code{nstud == TRUE}, the mapping from school codes to municipality (and province) codes. Needed only if \code{check == TRUE}, obtained as output of the function \code{\link{Get_School2mun}}.
If \code{NULL}, it will be downloaded automatically, but not saved in the global environment. \code{NULL} by default.}

\item{input_AdmUnNames}{Object of class \code{tbl_df}, \code{tbl} and \code{data.frame}, obtained as output of the function \code{\link{Get_AdmUnNames}}
If necessary,the ISTAT file including all the codes and the names of the administrative units for the year in scope. Required either if \code{nstud == TRUE & nstud_check == TRUE}, or if \code{SchoolBuildings == TRUE}, \code{input_DB_MIUR} is not provided, and the school year is one of 2015/16, 2017/18 or 1018/19
If \code{NULL}, it will be downloaded automatically, but not saved in the global environment. \code{NULL} by default.}

\item{input_InnerAreas}{Object of class \code{tbl_df}, \code{tbl} and \code{data.frame}.
If \code{InnerAreas == TRUE}, the classification of peripheral municipalities, obtained as output of the function \code{\link{Get_InnerAreas}}
If \code{NULL}, it will be downloaded automatically, but not saved in the global environment.
\code{NULL} by default}

\item{input_teachers4student}{Object of class \code{tbl_df}, \code{tbl} and \code{data.frame}. If \code{nteachers == TRUE} and \code{nstud = TRUE}, the number of teachers for studets by province. Please notice that
this object cannot be considered a substitute for the number of students by class since it provides no information on the number of schools in single educational grades but only at the school order level.
Obtained as output of the function \code{\link{Group_teachers4stud}}.
If \code{NULL}, it will be downloaded automatically, but not saved in the global environment.
\code{NULL} by default.}

\item{input_nteachers}{Object of class \code{tbl_df}, \code{tbl} and \code{data.frame}. If \code{nteachers == TRUE}, the number of teachers by province, obtained as output of the function \code{\link{Get_nteachers_prov}}. If \code{NULL}, it will be downloaded automatically, but not saved in the global environment.
\code{NULL} by default}

\item{input_BroadBand}{Object of classs \code{tbl_df}, \code{tbl} and \code{data.frame}. If BroadBand == TRUE, the raw Broadband connection dataset obtaned as output of the function \code{\link{Get_BroadBand}}
If \code{NULL}, it will be downloaded automatically but not saved in the global environment. \code{NULL} by default.}

\item{autoAbort}{Logical. In case any data must be retrieved, whether to automatically abort the operation and return NULL in case of missing internet connection or server response errors. \code{FALSE} by default.}
}
\value{
An object of  class \code{tbl_df}, \code{tbl} and \code{data.frame}
}
\description{
This function generates a unique dataframe of the school system data including a customary choice of available datasets. This function allows the user to aggregate the desired datasets, when available, among these:
\itemize{
  \item Invalsi census survey
  \item School buildings
  \item Number of students and school classes
  \item Number of teachers
  \item Broadband connection availability
}


To save as much time as possible it is possible to plug in ready-made input data; otherwise they will be downloaded automatically but not saved in the global environment
When a new dataset is joined to the existing ones, it is possible that some observations in this datasets are missing. In this case, by default, the choice of keeping as much observational units as possible, or to remove units with missing variables is left to the user.
}
\examples{



DB23_prov <- Set_DB(Year = 2023, level = "NUTS-3",Invalsi_grade = c(5, 8, 13),
      Invalsi_subj = "Italian",nteachers = FALSE, BroadBand = FALSE,
      SchoolBuildings_count_missing = FALSE,NA_autoRM= TRUE,
      input_SchoolBuildings = example_input_DB23_MIUR[, -c(11:18, 10:27)],
      input_Invalsi_IS = example_Invalsi23_prov,
      input_nstud = example_input_nstud23,
      input_InnerAreas = example_InnerAreas,
      input_School2mun = example_School2mun23,
      input_AdmUnNames = example_AdmUnNames20220630)


DB23_prov

summary(DB23_prov[, -c(22:62)])





}
\seealso{
\code{\link{Util_DB_MIUR_num}}, \code{\link{Group_DB_MIUR}}, \code{\link{Group_nstud}}, \code{\link{Util_Check_nstud_availability}}, \code{\link{Get_School2mun}}
for similar arguments.
}
