We use the method described in the paper, Thresholded Ordered Sparse Canonical Correlation Analysis (TOSCCA), to uncover the underlying processes linking the data.
X = standardVar(X0)
Y = standardVar(Y0)
K = 4 # number of components to be estimated
nonz_x = rep(100, K) # number of nonzero variables for X
nonz_y = rep(100, K) # number of nonzero variables for Y
init = "uniform" # type of initialisation
cca_toscca = toscca(X, Y, nonz_x, nonz_y, K, init, combination = FALSE, silent = TRUE, toPlot = FALSE)
cpev_toscca = sapply(1:K, function(k) cpev.toscca(X, cca_toscca$alpha[,1:k]))
# perm_toscca = perm.toscca(X, Y, nonz_x, nonz_y, K = K, init, draws = 100, cancor = cca_toscca$cancor)Estimate the canonical weights and latent paths for \(K\) components.
res_k = list()
X.temp = XX2
Y.temp = YY2
for (k in 1:1) {
if(k > 1) {
# residualise for subsequent components
X.temp = data.frame(X.temp[,c(1,2)],toscca::residualisation(as.matrix(X.temp[,-c(1,2)]), res_k[[k-1]]$alpha, type = "basic") )
Y.temp = data.frame(Y.temp[,c(1,2)],toscca::residualisation(as.matrix(Y.temp[,-c(1,2)]), res_k[[k-1]]$beta, type = "basic") )
nz_a_gen = as.numeric(table(res_k[[k-1]]$alpha != 0)[2])
nz_b_gen = as.numeric(table(res_k[[k-1]]$beta != 0)[2])
}
res_k[[k]] <- tosccamm(X.temp, Y.temp, folds = 2,
nonzero_a = nonz_a, nonzero_b = nonz_b,
model = "lme", lmeformula = " ~ 0 + poly(time,3) + (1|id)", silent = TRUE)
}Figures 3.a and 3.b in manuscript.