% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/outOfSample.R
\name{buildEvalSets}
\alias{buildEvalSets}
\title{Build disjoint set partition for out-of sample evaluation.}
\usage{
buildEvalSets(nRows, smallN = 100, ncross = 3)
}
\arguments{
\item{nRows}{scalar, >=1 number of rows to sample from.}

\item{smallN}{scalar if nRows<=smallN return a 1-holdout plan (nRows singletons for evaluation).}

\item{ncross}{scalar if nRows>smallN return a ncross-way cross validation plan (ncross disjoint partition).}
}
\value{
list of lists where the app portion of the sublists is a disjoint partion of seq_len(nRows) and each list as a train portion disjoint from app.
}
\description{
Return a disjoint partition of seq_len(nRows).  Very useful for any sort of
nested model situation (such as data prep, stacking, or super-learning).
}
\examples{

# use
buildEvalSets(200)

# longer example
# helper fns
# fit models using experiment plan to estimate out of sample behavior
fitModelAndApply <- function(trainData,applicaitonData) {
   model <- lm(y~x,data=trainData)
   predict(model,newdata=applicaitonData)
}
simulateOutOfSampleTrainEval <- function(d,fitApplyFn) {
   eSets <- buildEvalSets(nrow(d))
   evals <- lapply(eSets, 
      function(ei) { fitApplyFn(d[ei$train,],d[ei$app,]) })
   pred <- numeric(nrow(d))
   for(eii in seq_len(length(eSets))) {
     pred[eSets[[eii]]$app] <- evals[[eii]]
   }
   pred
}

# run the experiment
set.seed(2352356)
# example data
d <- data.frame(x=rnorm(5),y=rnorm(5),
        outOfSampleEst=NA,inSampleEst=NA)
        
# fit model on all data
d$inSampleEst <- fitModelAndApply(d,d)
# compute in-sample R^2 (above zero, falsely shows a 
#   relation until we adjust for degrees of freedom)
1-sum((d$y-d$inSampleEst)^2)/sum((d$y-mean(d$y))^2)

d$outOfSampleEst <- simulateOutOfSampleTrainEval(d,fitModelAndApply)
# compute out-sample R^2 (not positive, 
#  evidence of no relation)
1-sum((d$y-d$outOfSampleEst)^2)/sum((d$y-mean(d$y))^2)

}

