##' @name multi
##' @title Multiple \code{coxph} models
##' @aliases multi.coxph
##' @rdname multi
##' @export
##' 
##' @include genSurv.R
##' @useDynLib survMisc
##' 
##' @param x An object of class \code{coxph}
##' @param ... Not implemented
##' @param maxCoef Maximum no. of coefficients
##' @param crit Information criterion \dfn{IC}
##' @param how Method used to fit models. If \code{how="all"} (the default), all subsets of the
##' given model will be fit
##' @param confSetSize Size of returned confidence size. Number represents a row in the set.
##' (Columns represent parameters/coefficients in the models).
##' @param maxiter Maximum no. of iterations to use (for cox fitter).
##' Needs to be integer and should not
##' normally need to be > 100.
##' @param bunch When using \code{how="evolve"}:
##' no. of models to screen per generation
##' @param mutRate Mutation rate for new models
##' (both asexual and sexual selection). Should be in range \eqn{0-1}.
##' @param sexRate Sexual reproduction rate. Should be in range \eqn{0-1}.
##' @param immRate Immigration rate. Should be in range \eqn{0-1}.
##' Also \code{sexRate} + \code{immRate} should
##' not be \eqn{> 1}.
##' @param deltaM Target for change in mean IC determining convergence
##' when \code{how="evolve"}.
##' The last mean IC (from the best \code{confSetSize} models screened) is
##' compared with that from the most recently fitted \code{bunch}.
##' @param deltaB Change in best IC determining convergence of evolution.
##' This typically converges faster
##' than \code{deltaB}.
##' @param conseq Consecutive generations allowed which are 'divergent'
##' by both of the above criteria.
##' Algorithm will stop after this no. is reached.
##' @param report If \code{report=TRUE} (the default),
##' print report to screen during fitting.
##' Gives current change in best and mean IC as well as object size of
##' fitted models.
##' 
##' @return A \code{data.table} with one row per model.
##' This is of \code{class} \code{multi.coxph}
##' which has it's own \code{plot} method.
##' Columns show the coefficients from the fitted model. Values of \eqn{0}
##' indicate coefficient was not included. The \code{data.table} is sorted by IC
##' and also gives a column for relative evidence weights.
##' These are generated from:
##' \deqn{ w_i = \exp (\frac{-IC_i - IC_{best}}{2})}{
##'  w[i] = exp[ (-IC[i] - IC[best] )/2] }
##' Where \eqn{IC_i}{IC[i]} is the information criterion for the given model, and
##' \eqn{IC_{best}}{IC[best]} is that for the best model yet fitted.
##' They are then scaled to sum to \eqn{1}.
##'
##' @details This is based loosely on \code{package:glmulti}
##' (although is admittedly less efficient).
##' A more detailed discussion of the issues involved in multiple model
##' fitting is presented in the reference paper describing
##' that package's implementation.
##' \cr \cr
##' It is designed for cases where there a large no. of candidate models
##' for a given dataset (currently only right-censored survival data).
##' First, the \code{model.matrix} for the given formula is constructed.
##' For those unfamiliar with \code{model.matrix}, a predictor given as
##' a \code{factor} is expanded to
##' it's design matrix, so that e.g. for 4 original \code{level}s
##' there will be 3 binary (\eqn{0/1}) columns.
##' Currently all levels of a factor are considered independently
##' when fitting models.
##' \cr
##' Thus there is one column for each coefficient in the original model.
##' \cr \cr
##' The original formula can include the following terms:
##' \code{offset}, \code{weight} and \code{strata}.
##' Other \emph{special} terms such as \code{cluster} are not currently supported.
##' The formula may contain interaction terms and other transformations.
##' \cr \cr
##' If \code{how="all"}, all possible combinations of these coefficients
##' are fitted (or up to \code{maxCoef} predictors if this is less).
##' \cr \cr
##' If \code{how="evolve"} the algorithm proceeds as follows:
##' \enumerate{
##'  \item Fit \code{bunch} random models and sort by IC
##'  \item Generate another \code{bunch} candidate models based on these.
##' \code{immRate} gives the proportion that will be completely
##' random new models.
##' \code{sexRate} gives the proportion that will be the
##' products of existing models.
##' These are a random combination of the first elements from model 1
##' and the last elements from model 2.
##' The sum of \code{immRate} and \code{sexRate} should thus be \eqn{<= 1}.
##'  \item Other models (asexual) will be selected from the existing pool of
##' fitted models with a likelihood inversely proportional to their IC
##' (i.e. lower IC - more likely).
##' Both these and those generated by sexual reproduction
##' have a chance of mutation (elements changing from \eqn{1} to \eqn{0} or vice versa)
##' given by \code{mutRate}.
##'  \item Fit new models (not already fitted).
##'  \item Proceed until model fitting
##' is 'divergent' \code{conseq} times then stop.
##' Divergent is here taken to mean that the targets for \emph{both}
##' \code{deltaM} and \code{deltaB} have not been met.
##' \cr
##' \code{deltaM} typically converges more slowly. Thus a large value of \code{deltaM} will
##' require new \code{bunch}es of models to be signifiantly better than the best
##' (size = \code{confSetSize}) existing candidates.
##' Negative values of \code{deltaM} (not typically recommended) are more permissive;
##' i.e. new models can be somewhat worse than those existing.
##' }
##' The models are returned in a \code{data.table},
##' with one row per model giving the fitted coefficients,
##' the IC for the model and the relative evidence weights.
##' 
##' @note 
##' The algorithm will tend to slow as the population of fitted models expands.
##' @seealso \code{\link{ic}}
##' @seealso \code{\link{plot.MultiCoxph}}
##' 
##' @references Calgano V, de Mazancourt C, 2010.
##' glmulti: An R Package for Easy Automated Model Selection with (Generalized) Linear Models.
##' \emph{Journal of Statistical Software}. \bold{34}(12):1-29.
##' \href{http://www.jstatsoft.org/v34/i12/paper}{Available at JSS}.
##' 
multi <- function(x, ...){
    UseMethod("multi")
}
##'
##' @rdname multi
##' @method multi coxph
##' @export
##' 
##' @examples
##' set.seed(1)
##' df1 <- genSurvDf(b=1, c=5, f=0, model=FALSE)
##' multi(coxph(Surv(t1, e) ~ ., data=df1), crit="aic")
##' \dontrun{
##' ### longer example
##' dt1 <- genSurvDt(b=1, c=30, f=0, model=FALSE)
##' multi(coxph(Surv(t1, e) ~ ., data=dt1),
##' maxCoef=8, crit="bic", how="evolve", deltaM=1, deltaB=1, conseq=10) }
##' 
multi.coxph <- function(x, ...,
                        maxCoef=5L,
                        crit=c("aic", "aicc", "bic"),
                        how=c("all", "evolve"),
                        confSetSize=100L,
                        maxiter=100L,
                        bunch=1000L,
                        mutRate=0.1,
                        sexRate=0.2,
                        immRate=0.3,
                        deltaM=1,
                        deltaB=1,
                        conseq=10L,
                        report=TRUE){
    stopifnot(class(x)=="coxph")
    stopifnot(sexRate + immRate <= 1)
    crit <- match.arg(crit)
    how <- match.arg(how)
### for R CMD check
    ic <- NULL
### taken from stats::lm()
    mf <- x$call
    m <- match(c("formula", "data"), names(mf), 0L)
    mf <- mf[c(1L, m)]
    mf$drop.unused.levels <- TRUE
    mf[[1L]] <- as.name("model.frame")
    mf1 <- eval(mf, parent.frame())
### no. rows
    n1 <- dim(mf1)[1]
### get outcome
    stopifnot(attr(model.response(mf1), "type")=="right")
    y1 <- unclass(model.response(mf1, "numeric"))
### get time
    t1 <- as.double(y1[, 1])
### get strata
    if (any(grepl("strata", colnames(mf1)))){
        fac1 <- mf1[, grepl("strata", colnames(mf1))]
        strat1 <- as.integer(fac1)
### ### sort by strata then time
        sort1 <- order(strat1, t1)
        strat1 <- strat1[sort1]
### ### look for rows where strata changes
        newstrat1 <- as.integer(c(1 * (diff(as.numeric(strat1)) != 0), 1))
### ### remove strata from formula
        e1 <- sub("\\+.strata\\(.*?\\)", "", deparse(formula(x)))
        mf$formula <- as.formula(e1)
### ### refit model frame
        mf1 <- eval(mf, parent.frame())
    } else {
        newstrat1 <- as.integer(rep_len(0L, n1))
### ### sort by time only if no strata
        sort1 <- order(t1)
    }
### model terms
    mt <- attr(mf1, "terms")
    stopifnot(is.empty.model(mt)==FALSE)
### exclude intercept
    attr(mt, "intercept") <- 0
### sorted time and status
    t1 <- t1[sort1]
    status <- as.integer(y1[sort1, 2])
### no. events ( == 'effective' n for IC)
    ne1 <- sum(status)
### get offset and weights, if present
    if ("offset" %in% names(x)){
        offset <- x$offset[sort1]
        } else {
            offset <- rep_len(0, n1)
        }
    if ("weights" %in% names(x)){
        weights <- x$weights[sort1]
        } else {
            weights <- rep_len(1, n1)
        }
    method <- as.integer(x$method=="efron")
### ### get model matrix
    x1 <- model.matrix(mt, mf1, contrasts)
    x1 <- x1[sort1, ,drop=FALSE]
    np1 <- dim(x1)[2]
### no. predictors
    if (how=="all"){
        if (!np1>maxCoef){
            c2 <- combinat::hcube(rep(2L, np1))-1L
            c2 <- c2[-1, , drop=FALSE]
        } else {
### ### ### see end of file for .allCombo
            c2 <- .allCombo(long1=np1, max1s=maxCoef)
        }
### ### get coefficients and likelihoods
        l1 <- .getCoefs(c2=c2,
                        maxiter=maxiter, t1=t1, status=status,
                        x1, offset=offset, weights=weights,
                        newstrat1=newstrat1, method=method,
                        crit=crit, ne1=ne1)
### ### coefficients and ics
        res1 <- data.table(l1[[1]])
        setnames(res1, colnames(x1))
        res1[, "ic" := l1[[2]]]
### ### relative evidence weights
    }
###
    if (how=="evolve"){
### ### random sample
        c2 <- matrix(sample(c(0, 1), size=np1 * bunch,
                            replace=TRUE, prob=c(bunch, maxCoef)),
                     nrow=bunch)
### ### remove those with more than max. no coefficients
        c2 <- c2[apply(c2, 1, sum) <= maxCoef, ,drop=FALSE]
        c2 <- unique(c2)
        dimnames(c2) <- list(seq_len(nrow(c2)), colnames(x1))
### ### coefficients
        l1 <- .getCoefs(c2=c2,
                        maxiter=maxiter, t1=t1, status=status,
                        x1, offset=offset, weights=weights,
                        newstrat1=newstrat1, method=method,
                        crit=crit, ne1=ne1)
### ### current best and average
        cb1 <- min(l1[[2]])
        ca1 <- mean(l1[[2]])
### ### population screened
        scr1 <- data.table(c2)
        scr1[, "ic" := l1[[2]]]
        setkey(scr1, ic)
        res1 <- data.table::data.table(l1[[1]])
        data.table::setnames(res1, colnames(x1))
        res1[, "ic" := l1[[2]] ]
###
### ### main loop
###
        countEnd <- 0L
        gen1 <- 1L
        while(countEnd <= conseq){
### ### generate next set
            c3 <- .genModels(bunch=bunch, np1=np1,
                             immRate=immRate,
                             mutRate=mutRate,
                             sexRate=sexRate,
                             scr1=scr1)
            c3 <- unique(c3)
### ### check if already fitted
            M1 <- data.table::setkey(data.table(c3))
            nao1 <- na.omit(M1[scr1, which=TRUE])
            if (length(nao1)) c3 <- c3[-nao1, ,drop=FALSE]
            l1 <- .getCoefs(c2=c3,
                            maxiter=maxiter, t1=t1, status=status,
                            x1, offset=offset, weights=weights,
                            newstrat1=newstrat1, method=method,
                            crit=crit, ne1=ne1)
### ### change in best and average
            chb1 <- cb1 - min(l1[[2]])
            cha1 <- ca1 - mean(l1[[2]])
### ### new best and mean
            if (dim(scr1)[1] < confSetSize){
                cb1 <- min(scr1[, ic])
                ca1 <- mean(scr1[, ic])
            } else {
                cb1 <- min(scr1[1:confSetSize, ic])
                ca1 <- mean(scr1[1:confSetSize, ic])
            }
### ### add to screened set
            scr1 <- rbindlist(list(scr1, data.table(cbind(c3, l1[[2]]))))
            scr1 <- unique(scr1)
            data.table::setkey(scr1, "ic")
            res1 <- data.table::rbindlist(list(res1,
                                               data.table::data.table(cbind(l1[[1]], l1[[2]]))))
            res1 <- unique(res1)
            if (report){
                print(paste("Best model:",
                            paste(colnames(scr1), scr1[1, ], sep="=",
                                  collapse=",")
                            ))
                print(paste("Change in best IC", chb1))
                print(paste("Current mean IC", ca1))
                print(paste("Change in mean IC", cha1))
                gen1 <- gen1+1L
                print(paste("Generation :", gen1))
                print("Size of screened object:")
                print(object.size(scr1), units="Kb")
            }
### ### check for convergence
            if(chb1 < deltaB & cha1 < deltaM) countEnd <- countEnd+1
        }
    }
### get relative evidence weights
    .getRelEvW <- function(ic, bestIc){ exp( - (ic - bestIc / 2)) }
    rew1 <- sapply(res1[, ic], .getRelEvW, bestIc=min(res1[, ic]))
    res1[, "weight" := rew1 / sum(rew1)]
    data.table::setkey(res1, ic)
    if (dim(res1)[1] > confSetSize) res1 <- res1[1:confSetSize, ]
    class(res1) <- c("multi.coxph", "data.table", "data.frame")
    attr(res1, "crit") <- crit
    attr(res1, "how") <- how
    if (how=="evolve") attr(res1, "generations") <- gen1
    return(res1)
}
###----------------------------------------
.getCox <- function(hypo=hypo,
                    maxiter=maxiter, t1=t1, status=status,
                    wx, offset=offset, weights=weights,
                    newstrat1=newstrat1, method=method, i=i
                    ){
### to keep
### k1 <- seq_along(hypo)[as.logical(hypo)]
### wx = working copy of x
    r1 <- .Call("cf6",
                maxiter,
                t1,
                status,
                wx,
                offset,
                weights,
                newstrat1,
                method,
                1e-09,  1e-12,
                rep_len(0, sum(hypo)), 1L,
                PACKAGE="survMisc")
### last 1L indicates use rescaling
### (more stable numerically)
    if (r1[[8]] > 100) warning(paste("Row", i, "did not converge"))
    hypo[as.logical(hypo)] <- r1[[1]]
    l1 <- r1[[5]][2]
    return(list(hypo, l1))
}
###----------------------------------------
.getCoefs <- function(c2=c2,
                      maxiter=maxiter, t1=t1, status=status,
                      x1, offset=offset, weights=weights,
                      newstrat1=newstrat1, method=method,
                      crit=crit, ne1=ne1){
### copy to hold coefficients
    co1 <- c2
    nr1 <- dim(c2)[1]
    ic1 <- rep_len(0.1, nr1)
    for (i in seq_len(nr1)){
        hypo <- c2[i, ,drop=FALSE]
        r1 <- .getCox(hypo=hypo,
                      maxiter=maxiter,
                      t1=t1, status=status,
                      wx=x1[, (seq_along(hypo)[as.logical(hypo)]), drop=FALSE],
                      offset=offset, weights=weights,
                      newstrat1=newstrat1,
                      method=method, i=i
                      )
        co1[i, ] <- r1[[1]]
        ic1[i] <- .getIc(r1[[2]], crit=crit, edf=sum(hypo), ne1=ne1)
    }
    return(list(co1, ic1))
}
###----------------------------------------
.getIc <- function(l1, crit, edf, ne1){
    switch(crit,
           "aic" = 2 * edf - (2 * l1),
           "aicc" = ( 2 * edf - (2 * l1) ) + ( 2*edf*(edf+1) / (ne1-edf-1) ),
           "bic" = log(ne1) * edf - (2 * l1)
           )
}
###----------------------------------------
.genModels <- function(bunch=bunch, np1=np1,
                       immRate=immRate,
                       mutRate=mutRate,
                       sexRate=sexRate,
                       scr1=scr1){
    ic <- NULL
    t1 <- matrix(0L, nrow=bunch, ncol=np1)
### new immigrants
    ni1 <- as.integer(immRate * dim(t1)[1])
    t1[(dim(t1)[1]-ni1):dim(t1)[1], ] <- sample.int(2,
                                              size=np1 * (ni1 + 1),
                                              replace=TRUE)-1
### asexual
### sample from existing with probability relative to IC
    n1 <- dim(t1)[1]-ni1-1
    t1[1:n1, ] <- apply(scr1[, ], 2, sample,
                        size=n1, prob=(1/scr1[, ic]), replace=TRUE)[, (-np1-1)]
### mutations
    f1 <- function(x) if(x==0) 1 else 0
    muts1 <- sample.int(length(t1), size=mutRate * length(t1))
    t1[muts1] <- sapply(t1[muts1], f1)
### new sexual formulas
    ns1 <- sexRate * dim(t1)[1]
    t2 <- t3 <- t1[sample.int(dim(t1)[1]-ni1, size=ns1), ]
    for (i in 1:dim(t2)[1]){
### ### sample column then row
        sc1 <- sample.int(dim(t2)[2]-1, size=1)
        sr1 <- sample.int(dim(t2)[1], size=1)
        t3[i, ] <- c(t2[sr1, 1:sc1], t2[sr1, (sc1+1):dim(t2)[2]])
    }
### with mutations
    muts2 <- sample.int(length(t3), size=ceiling(mutRate * length(t3)))
    t3[muts2] <- sapply(t3[muts2], f1)
    t1[(dim(t1)[1]-ni1-ns1+1):(dim(t1)[1]-ni1), ] <- t3
    return(t1)
}
###----------------------------------------
.allCombo <- function(long1, max1s){
    do.call(rbind, lapply(1:max1s, function(num1) {
        t(apply(combn(long1, num1), 2, function(x) {
            col = rep(0L, long1)
            col[x] = 1L
            col
        }))
    }))
}
