toscca-vignette

library(toscca)
library(ggplot2)

Canonical Correlation Analysis

We use the method described in the paper, Thresholded Ordered Sparse Canonical Correlation Analysis (TOSCCA), to uncover the underlying processes linking the data.

X = standardVar(X0)
Y = standardVar(Y0)
K = 4                                       # number of components to be estimated
nonz_x = rep(100, K)                        # number of nonzero variables for X
nonz_y = rep(100, K)                        # number of nonzero variables for Y
init   = "uniform"                          # type of initialisation
cca_toscca  = toscca(X, Y, nonz_x, nonz_y, K, init, combination = FALSE, silent = TRUE, toPlot = FALSE)
cpev_toscca = sapply(1:K, function(k) cpev.toscca(X, cca_toscca$alpha[,1:k]))

# perm_toscca = perm.toscca(X, Y, nonz_x, nonz_y, K = K, init, draws = 100, cancor = cca_toscca$cancor)

tosccamm

Estimate the canonical weights and latent paths for \(K\) components.

res_k = list()

X.temp = XX2
Y.temp = YY2
for (k in 1:1) {
  if(k > 1) {
    # residualise for subsequent components
    X.temp = data.frame(X.temp[,c(1,2)],toscca::residualisation(as.matrix(X.temp[,-c(1,2)]), res_k[[k-1]]$alpha, type = "basic") )
    Y.temp = data.frame(Y.temp[,c(1,2)],toscca::residualisation(as.matrix(Y.temp[,-c(1,2)]), res_k[[k-1]]$beta, type = "basic") )

    nz_a_gen = as.numeric(table(res_k[[k-1]]$alpha != 0)[2])
    nz_b_gen = as.numeric(table(res_k[[k-1]]$beta != 0)[2])
  }

  res_k[[k]] <- tosccamm(X.temp, Y.temp, folds = 2,
                                            nonzero_a = nonz_a, nonzero_b = nonz_b,
                                            model = "lme", lmeformula = " ~ 0 + poly(time,3) + (1|id)", silent = TRUE)

}

Results

Latent paths for \(k=1\) and \(k=2\)

Figures 3.a and 3.b in manuscript.