% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/variable_selection.R
\name{feature_selector}
\alias{feature_selector}
\title{Feature Selection Wrapper}
\usage{
feature_selector(
  dat_train,
  dat_test = NULL,
  x_list = NULL,
  target = NULL,
  pos_flag = NULL,
  occur_time = NULL,
  ex_cols = NULL,
  filter = c("IV", "PSI", "XGB", "COR"),
  cv_folds = 1,
  iv_cp = 0.01,
  psi_cp = 0.5,
  xgb_cp = 0,
  cor_cp = 0.98,
  breaks_list = NULL,
  hopper = FALSE,
  vars_name = TRUE,
  parallel = FALSE,
  note = TRUE,
  seed = 46,
  save_data = FALSE,
  file_name = NULL,
  dir_path = tempdir(),
  ...
)
}
\arguments{
\item{dat_train}{A data.frame with independent variables and target variable.}

\item{dat_test}{A data.frame of test data. Default is NULL.}

\item{x_list}{Names of independent variables.}

\item{target}{The name of target variable.}

\item{pos_flag}{The value of positive class of target variable, default: "1".}

\item{occur_time}{The name of the variable that represents the time at which each observation takes place.}

\item{ex_cols}{A list of excluded variables. Regular expressions can also be used to match variable names. Default is NULL.}

\item{filter}{The methods for selecting important and stable variables.}

\item{cv_folds}{Number of cross-validations. Default: 5.}

\item{iv_cp}{The minimum threshold of IV. 0 < iv_i ; 0.01 to 0.1 usually work. Default: 0.02}

\item{psi_cp}{The maximum threshold of PSI.  0 <= psi_i <=1; 0.05 to 0.2 usually work. Default: 0.1}

\item{xgb_cp}{Threshold of XGB feature's Gain. 0 <= xgb_cp <=1. Default is 1/number of independent variables.}

\item{cor_cp}{Threshold of correlation between features. 0 <= cor_cp <=1; 0.7 to 0.98 usually work. Default is 0.98.}

\item{breaks_list}{A table containing a list of splitting points for each independent variable. Default is NULL.}

\item{hopper}{Logical.Filtering screening. Default is FALSE.}

\item{vars_name}{Logical, output a list of filtered variables or table with detailed IV and PSI value of each variable. Default is FALSE.}

\item{parallel}{Logical, parallel computing. Default is FALSE.}

\item{note}{Logical.Outputs info. Default is TRUE.}

\item{seed}{Random number seed. Default is 46.}

\item{save_data}{Logical, save results in locally specified folder. Default is FALSE.}

\item{file_name}{The name for periodically saved results files. Default is "select_vars".}

\item{dir_path}{The path for periodically saved results files. Default is "./variable"}

\item{...}{Other parameters.}
}
\value{
A list of selected features
}
\description{
\code{feature_selector} This function uses four different methods (IV, PSI, correlation, xgboost) in order to select important features.The correlation algorithm must be used with IV.
}
\examples{
feature_selector(dat_train = UCICreditCard[1:1000,c(2,8:12,26)],
                      dat_test = NULL, target = "default.payment.next.month",
                      occur_time = "apply_date", filter = c("IV", "PSI"),
                      cv_folds = 1, iv_cp = 0.01, psi_cp = 0.1, xgb_cp = 0, cor_cp = 0.98,
                      vars_name = FALSE,note = FALSE)
}
\seealso{
\code{\link{psi_iv_filter}}, \code{\link{xgb_filter}}, \code{\link{gbm_filter}}
}
