1 Preamble

Dependencies:

suppressPackageStartupMessages(library(plantmix))
suppressPackageStartupMessages(library(ggplot2))

2 Simulate data

set.seed(12345)

2.1 Simulate the genotypes

2.1.1 Set the dimensions

levSpecies <- c("S1", "S2")

nbGenos <- c("S1" = 500, "S2" = 500)
levGenos <- list(
  "S1" = sprintf(
    fmt = paste0("gS1_%0", floor(log10(nbGenos["S1"])) + 1, "i"),
    1:nbGenos["S1"]
  ),
  "S2" = sprintf(
    fmt = paste0("gS2_%0", floor(log10(nbGenos["S2"])) + 1, "i"),
    1:nbGenos["S2"]
  )
)

nbSnps <- c("S1" = 1000, "S2" = 1000)
levSnps <- list(
  "S1" = sprintf(
    fmt = paste0("sS1_%0", floor(log10(nbSnps["S1"])) + 1, "i"),
    1:nbSnps["S1"]
  ),
  "S2" = sprintf(
    fmt = paste0("sS2_%0", floor(log10(nbSnps["S2"])) + 1, "i"),
    1:nbSnps["S2"]
  )
)

2.1.2 SNP genotypes

nb_pops <- 10
weak_div_pops <- diag(nb_pops)
weak_div_pops[upper.tri(weak_div_pops)] <- 0.9
weak_div_pops[lower.tri(weak_div_pops)] <- weak_div_pops[upper.tri(weak_div_pops)]
snpGenos <- lapply(levSpecies, function(species) {
  tmp <- rep(nbGenos[species] / nb_pops, nb_pops - 1)
  tmp <- c(tmp, nbGenos[species] - sum(tmp))
  simulGenosDoseStruct(
    nb_genos = tmp,
    nb_snps = nbSnps[species],
    div_pops = weak_div_pops,
    geno_IDs = levGenos[[species]],
    snp_IDs = levSnps[[species]]
  )
})
names(snpGenos) <- levSpecies
sapply(snpGenos, dim)
##        S1   S2
## [1,]  500  500
## [2,] 1000 1000
snpGenos$S1[1:3, 1:4]
##         sS1_0001 sS1_0002 sS1_0003 sS1_0004
## gS1_001        0        1        1        1
## gS1_002        1        1        2        1
## gS1_003        0        0        1        0
table(snpGenos$S1)
## 
##      0      1      2 
## 154021 201021 144958

2.1.3 Additive genetic relationships

For simplicity, the first estimator of VanRaden (2008) is used, that assumes linkage equilibrium and Hard-Weinberg equilibrium.

snpAFs <- lapply(snpGenos, function(M) {
  colSums(M) / (2 * nrow(M))
})
GRMs_vr <- lapply(levSpecies, function(species) {
  GRM <- estimGRM(snpGenos[[species]], snpAFs[[species]])
  as.matrix(Matrix::nearPD(GRM)$mat)
})
names(GRMs_vr) <- levSpecies
species <- "S1"
GRM <- GRMs_vr[[species]]
image(Matrix(GRM), main = paste0("GRM for ", species))

hist(diag(GRM), main = paste0("GRM for ", species))

hist(GRM[upper.tri(GRM)], main = paste0("GRM for ", species))

2.2 Simulate the phenotypes

As in Salomon et al (2026), the design will be incomplete (sparse) but balanced, and tester-based, involving:

  • many genotypes of the first species, the focal one, whose breeding values will be statistically modeled as random, with kinship matrix \(K\);

  • and a small number of genotypes of the second species, the tester one, whose breeding values will be statistically modeled as fixed.

The yield data are generated according to the following equations:

  • intercrops: \(Y_{IC} = X_{IC} B_{IC} + Z_{DS_f} BV_f + Z_{D{\times}S} \, DBV{\times}SBV + E_{IC}\)

  • sole crops:

    • focal species: \(y_{SC_f} = X_{SC_f} \beta_{SC_f} + Z_{D_f} DBV_f + Z_{D_f} SIGV_f + e_{SC_f}\) where \(\beta_f\) only includes the contrasts for the “block” explanatory factor

    • tester species: \(y_{SC_t} = X_{SC_t} \beta_{SC_t} + e_{SC_t}\) where \(\beta_t\) includes the contrasts for the “block” and “DBV” explanatory factors (the “SIGV” explanatory factor for the tester species is ignored)

The parameter values correspond to cereal-legume mixtures such as winter wheat and pea, inspired from the papers of Moutier et al (2022) and Haug et al (2023).

nbGenosTrial <- c("S1" = 300, "S2" = 2)
levGenosTrial <- lapply(levSpecies, function(species) {
  sample(levGenos[[species]], nbGenosTrial[species])
})
names(levGenosTrial) <- levSpecies

GRMs_vr_trial <- list()
GRMs_vr_trial$S1 <- GRMs_vr$S1[levGenosTrial$S1, levGenosTrial$S1]
GRMs_vr_trial$S2 <- diag(nbGenosTrial["S2"]) # diag because modeled as fixed
dimnames(GRMs_vr_trial$S2) <- list(levGenosTrial$S2, levGenosTrial$S2)

set.seed(12345)
out <- simulDBVSBVinter(GRMs_vr_trial) # see ?simulDBVSBVinter for more details
names(out)
## [1] "truth"           "datW"            "datL"            "obsMC"          
## [5] "sowingDensities" "props"
tmp <- list2env(out, envir = environment())

2.2.1 Design sparsity

plantmix:::plotAllocSchemeInterMixDesign(datW)

2.2.2 Indices of plant mixtures

Several indices can be used to compare sole crops and intercrops. Below some of them are computed using the true breeding values, i.e., with neither block effects nor experimental errors, to give an idea of what the simulated data correspond to.

2.2.2.1 RYT (LER) and RYP

## Reformat and compute
is_mix <- which(datW$type == "IC")
true_RYTs <- list()
true_RYPs <- list()
for (idx in is_mix) {
  true_y1_IC <- as.vector(truth$mu[["S1"]]["IC"]) + datW$true_gen_yield_S1[idx]
  true_y2_IC <- as.vector(truth$mu[["S2"]]["IC"]) + datW$true_gen_yield_S2[idx]
  g1 <- as.character(datW$geno_S1[idx])
  g2 <- as.character(datW$geno_S2[idx])
  true_y1_SC <- as.vector(truth$mu[["S1"]]["SC"]) +
    datW$true_gen_yield_S1[datW$geno_S1 == g1 &
      is.na(datW$geno_S2)]
  true_y2_SC <- as.vector(truth$mu[["S2"]]["SC"]) +
    datW$true_gen_yield_S2[datW$geno_S2 == g2 &
      is.na(datW$geno_S1)]
  true_y2_SC <- unique(true_y2_SC)
  yields <- data.frame(
    SCcrop = c(true_y1_SC, true_y2_SC),
    intercrop = c(true_y1_IC, true_y2_IC),
    row.names = c(g1, g2)
  )
  tmp <- LER(yields)
  mixId <- paste0(g1, "-", g2)
  true_RYTs[[mixId]] <- c(
    "RY_S1" = as.numeric(tmp$pLER[1]),
    "RY_S2" = as.numeric(tmp$pLER[2]),
    "RYT" = tmp$LER
  )
  true_RYPs[[mixId]] <- c(
    "RYP_S1" = true_y1_IC /
      (props[["S1"]] * true_y1_SC),
    "RYP_S2" = true_y2_IC /
      (props[["S2"]] * true_y2_SC)
  )
}
true_RYTs <- data.frame(
  mix = names(true_RYTs),
  geno_S1 = sapply(strsplit(names(true_RYTs), "-"), `[`, 1),
  geno_S2 = sapply(strsplit(names(true_RYTs), "-"), `[`, 2),
  as.data.frame(do.call(rbind, true_RYTs)),
  stringsAsFactors = TRUE
)
str(true_RYTs)
## 'data.frame':    300 obs. of  6 variables:
##  $ mix    : Factor w/ 300 levels "gS1_001-gS2_301",..: 24 210 257 230 285 118 224 267 158 205 ...
##  $ geno_S1: Factor w/ 300 levels "gS1_001","gS1_002",..: 24 210 257 230 285 118 224 267 158 205 ...
##  $ geno_S2: Factor w/ 2 levels "gS2_191","gS2_301": 1 1 1 1 1 1 1 1 1 1 ...
##  $ RY_S1  : num  0.389 0.591 0.48 0.476 0.522 ...
##  $ RY_S2  : num  1.003 0.697 1.086 0.977 0.859 ...
##  $ RYT    : num  1.39 1.29 1.57 1.45 1.38 ...
summary(true_RYTs[, grep("RY_", colnames(true_RYTs))])
##      RY_S1            RY_S2       
##  Min.   :0.3219   Min.   :0.6613  
##  1st Qu.:0.4592   1st Qu.:0.8462  
##  Median :0.4925   Median :0.8918  
##  Mean   :0.4910   Mean   :0.8959  
##  3rd Qu.:0.5218   3rd Qu.:0.9500  
##  Max.   :0.6148   Max.   :1.1151
summary(true_RYTs[, grep("RYT", colnames(true_RYTs))])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.245   1.346   1.387   1.387   1.426   1.566
true_RYPs <- data.frame(
  mix = names(true_RYPs),
  geno_S1 = sapply(strsplit(names(true_RYPs), "-"), `[`, 1),
  geno_S2 = sapply(strsplit(names(true_RYPs), "-"), `[`, 2),
  as.data.frame(do.call(rbind, true_RYPs)),
  stringsAsFactors = TRUE
)
str(true_RYPs)
## 'data.frame':    300 obs. of  5 variables:
##  $ mix    : Factor w/ 300 levels "gS1_001-gS2_301",..: 24 210 257 230 285 118 224 267 158 205 ...
##  $ geno_S1: Factor w/ 300 levels "gS1_001","gS1_002",..: 24 210 257 230 285 118 224 267 158 205 ...
##  $ geno_S2: Factor w/ 2 levels "gS2_191","gS2_301": 1 1 1 1 1 1 1 1 1 1 ...
##  $ RYP_S1 : num  0.493 0.748 0.608 0.602 0.662 ...
##  $ RYP_S2 : num  4.76 3.31 5.16 4.64 4.08 ...
summary(true_RYPs[, grep("RYP", colnames(true_RYPs))])
##      RYP_S1           RYP_S2     
##  Min.   :0.4077   Min.   :3.141  
##  1st Qu.:0.5817   1st Qu.:4.020  
##  Median :0.6238   Median :4.236  
##  Mean   :0.6219   Mean   :4.255  
##  3rd Qu.:0.6610   3rd Qu.:4.512  
##  Max.   :0.7788   Max.   :5.297

if (FALSE) {
  ## using the RYT() function
  keys <- unique(paste0(datL$focal, " in ", datL$standID))
  tmp <- do.call(rbind, strsplit(keys, " in "))
  datLavg <- data.frame(
    key = keys,
    focal = tmp[, 1],
    standID = tmp[, 2],
    stringsAsFactors = TRUE
  )
  datLavg$type <- "IC"
  datLavg$type[as.character(datLavg$focal) == as.character(datLavg$standID)] <- "SC"
  datLavg$focal_species <- "S1"
  datLavg$focal_species[grep("^gS2_", datLavg$focal)] <- "S2"
  datLavg$prop <- 1
  datLavg$prop[datLavg$type == "IC" & datLavg$focal_species == "S1"] <- props["S1"]
  datLavg$prop[datLavg$type == "IC" & datLavg$focal_species == "S2"] <- props["S2"]
  for (i in 1:nrow(datLavg)) {
    idx <- which(datL$focal == datLavg$focal[i] &
      datL$standID == datLavg$standID[i])
    datLavg$focal_yield[i] <- mean(datL$focal_yield[idx])
  }
  true_RYTs2 <- RYT(datLavg, "standID", "focal", "prop", "focal_yield")
  true_RYTs2 <- droplevels(true_RYTs2[!is.na(true_RYTs2$RYT), ])
  true_RYTs2 <- droplevels(true_RYTs2[!duplicated(true_RYTs2$standID), ])
}

## Plot
ggplot(true_RYTs) +
  aes(x = RY_S1) +
  geom_histogram(color = "white", bins = 30) +
  geom_vline(
    xintercept = sowingDensities$S1["IC"] /
      sowingDensities$S1["SC"],
    col = "red", linewidth = 2
  ) +
  labs(
    title = "True relative yields (partial land-equivalent ratios) of species 1 for all mixtures",
    x = "RY (partial LER) of species 1"
  ) +
  theme_bw()


ggplot(true_RYTs) +
  aes(x = RY_S2) +
  geom_histogram(color = "white", bins = 30) +
  geom_vline(
    xintercept = sowingDensities$S2["IC"] /
      sowingDensities$S2["SC"],
    col = "red", linewidth = 2
  ) +
  labs(
    title = "True partial land-equivalent ratio of species 2 for all mixtures",
    x = "RY (partial LER) of species 2"
  ) +
  theme_bw()


ggplot(true_RYTs) +
  aes(x = geno_S2, y = RYT) +
  geom_violin(aes(fill = geno_S2), trim = FALSE, show.legend = FALSE) +
  geom_boxplot(width = 0.2) +
  labs(
    title = "True land-equivalent ratio for all mixtures"
  ) +
  theme_bw()


p <- ggplot(true_RYTs) +
  aes(x = RY_S1, y = RY_S2, color = geno_S2)
for (i in seq(0.75, 2, by = 0.25)) {
  if (i == 1) {
    p <- p + geom_abline(intercept = i, slope = -1, linetype = "solid", color = "black")
  } else {
    p <- p + geom_abline(intercept = i, slope = -1, linetype = "dotted", color = "black")
  }
}
p + geom_abline(intercept = 0, slope = 1, linetype = "dotted", color = "black") +
  geom_point(size = 2) +
  labs(
    title = "True relative yields (RYs, a.k.a. partial LERs)",
    x = "relative yield (partial LER) of species 1",
    y = "relative yiedl (partial LER) of species 2",
    color = "Tester"
  ) +
  ## guides(color="none") +
  scale_x_continuous(breaks = seq(0, 1.4, by = 0.1)) +
  scale_y_continuous(breaks = seq(0, 1.4, by = 0.1)) +
  coord_cartesian(xlim = c(0, 1.4), ylim = c(0, 1.4)) +
  theme(aspect.ratio = 1) +
  theme_bw()


ggplot(true_RYPs) +
  aes(x = RYP_S1, y = RYP_S2, color = geno_S2) +
  geom_abline(intercept = 0, slope = 1, linetype = "solid", color = "black") +
  geom_hline(yintercept = 1) +
  geom_vline(xintercept = 1) +
  geom_point(size = 2) +
  labs(
    title = "True relative yields per plant (RYPs)",
    x = "RYP of species 1",
    y = "RYP of species 2",
    color = "Tester"
  ) +
  ## guides(color="none") +
  scale_x_continuous(breaks = seq(0, 6.5, by = 1)) +
  scale_y_continuous(breaks = seq(0, 6.5, by = 1)) +
  coord_cartesian(xlim = c(0, 6.5), ylim = c(0, 6.5)) +
  theme(aspect.ratio = 1) +
  theme_bw()

2.2.2.2 RYM

## Reformat and compute
tmp <- datW[, c("geno_S1", "geno_S2", "true_yield_S1", "true_yield_S2")]
tmp$ID <- NA
tmp$props <- NA
tmp$true_yield <- NA
## sole crop of species 1:
idx <- which(!is.na(tmp$geno_S1) & is.na(tmp$geno_S2))
tmp$ID[idx] <- as.character(tmp$geno_S1[idx])
tmp$props[idx] <- "1"
tmp$true_yield[idx] <- tmp$true_yield_S1[idx]
## sole crop of species 2:
idx <- which(is.na(tmp$geno_S1) & !is.na(tmp$geno_S2))
tmp$ID[idx] <- as.character(tmp$geno_S2[idx])
tmp$props[idx] <- "1"
tmp$true_yield[idx] <- tmp$true_yield_S2[idx]
## intercrops of species 1 and 2:
idx <- which(!is.na(tmp$geno_S1) & !is.na(tmp$geno_S2))
sep <- "|"
tmp$ID[idx] <- as.character(paste0(
  tmp$geno_S1[idx], sep,
  tmp$geno_S2[idx]
))
prop1 <- props["S1"]
prop2 <- props["S2"]
stopifnot(prop1 + prop2 == 1)
prop1 <- round(prop1, 2)
prop2 <- 1 - prop1
tmp$props[idx] <- paste0(prop1, sep, prop2)
tmp$true_yield[idx] <- tmp$true_yield_S1[idx] + tmp$true_yield_S2[idx]
stopifnot(all(!is.na(tmp$ID)))
tmp$ID <- factor(tmp$ID)
tmp$props <- factor(tmp$props)
## keep only one yield (the true one) per modality
dupIDs <- table(as.character(tmp$ID))
(dupIDs <- names(dupIDs)[dupIDs > 1])
## [1] "gS2_191" "gS2_301"
for (dupID in dupIDs) {
  idx <- which(as.character(tmp$ID) == dupID)
  tmp <- droplevels(tmp[-idx[2:length(idx)], ])
}
rm(dupIDs)

tmp <- RYM(tmp,
  colIDstand = "ID", colIDcomps = "ID", colProps = "props",
  colY = "true_yield", sep = "|"
)
summary(tmp$RYM)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.8713  0.9730  1.0126  1.0182  1.0583  1.2095     302

## Plot
ggplot(tmp) +
  aes(x = RYM) +
  geom_histogram(na.rm = TRUE, bins = 30, color = "white") +
  geom_vline(xintercept = 1, linewidth = 2) +
  geom_vline(xintercept = mean(tmp$RYM, na.rm = TRUE), linewidth = 2, color = "red") +
  labs(title = "True relative yields of mixtures (RYMs)") +
  theme_bw()

3 Explore the data

In this section, an exploratory data analysis is done on the data including block effects and experimental errors, so that it can be easily applied on real data, too.

str(datW)
## 'data.frame':    604 obs. of  20 variables:
##  $ standID          : Factor w/ 602 levels "gS1_001","gS1_001+gS2_301",..: 47 351 567 253 419 471 89 513 459 289 ...
##  $ geno_S1          : Factor w/ 300 levels "gS1_001","gS1_002",..: 24 176 284 127 210 236 45 257 230 145 ...
##  $ geno_S2          : Factor w/ 2 levels "gS2_191","gS2_301": NA NA NA NA NA NA NA NA NA NA ...
##  $ type             : Factor w/ 2 levels "SC","IC": 1 1 1 1 1 1 1 1 1 1 ...
##  $ type2            : Factor w/ 3 levels "sole_S1","sole_S2",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ block            : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
##  $ x                : int  6 23 16 19 29 21 4 23 21 11 ...
##  $ y                : int  9 6 2 10 4 2 7 3 4 3 ...
##  $ plot             : Factor w/ 604 levels "10A1","10A10",..: 575 143 61 88 200 119 426 140 121 14 ...
##  $ true_gen_yield_S1: num  -2.9044 -0.0841 -9.0603 -2.8195 12.7576 ...
##  $ true_gen_yield_S2: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ true_yield_S1    : num  61.2 64 55.1 61.3 76.9 ...
##  $ true_yield_S2    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ true_fix_yield_S1: num  64.1 64.1 64.1 64.1 64.1 ...
##  $ true_fix_yield_S2: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ true_rnd_yield_S1: num  -2.9044 -0.0841 -9.0603 -2.8195 12.7576 ...
##  $ true_rnd_yield_S2: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ yield_S1         : num  61 61.7 56.5 61.8 77 ...
##  $ yield_S2         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ tot_yield        : num  61 61.7 56.5 61.8 77 ...
tapply(datW$type, datW$block, table)
## $A
## 
##  SC  IC 
## 152 150 
## 
## $B
## 
##  SC  IC 
## 152 150
ggplot(datL) +
  aes(x = block, y = focal_yield) +
  geom_violin(aes(fill = block)) +
  geom_boxplot(fill = "white", width = 0.2) +
  theme_bw() +
  facet_grid(focal_species ~ type)


is_mix <- datW$type == "IC"
subDatW <- droplevels(datW[is_mix, ])
ggplot(subDatW) +
  aes(x = yield_S1, y = yield_S2, color = geno_S1, shape = geno_S2) +
  geom_abline(intercept = seq(0, 200, by = 10), slope = -1, linetype = "dotted", color = "black") +
  geom_point(size = 2) +
  labs(
    title = "Partial yields in intercrop",
    x = "species 1 (in qt.ha-1)", y = "species 2 (in qt.ha-1)",
    shape = "Tester (species S2)"
  ) +
  guides(color = "none") +
  theme_bw()

3.1 Yield map

## Add the empty micro-plots:
coords <- data.frame(
  x = rep(sort(unique(datW$x)), each = length(unique(datW$y))),
  y = sort(unique(datW$y)),
  block = "A",
  plot = NA
)
coords$block[coords$x >= min(datW$x[datW$block != "A"])] <- "B"
coords$plot <- paste0(coords$x, coords$block, coords$y)
length(idx <- which(!coords$plot %in% as.character(datW$plot)))
## [1] 16
tmp <- as.data.frame(matrix(nrow = length(idx), ncol = ncol(datW)))
colnames(tmp) <- colnames(datW)
tmp[, c("x", "y", "block", "plot")] <- coords[idx, ]
datWSupp <- rbind(
  datW,
  as.data.frame(tmp)
)

## Plot
xranges <- do.call(rbind, tapply(datW$x, datW$block, range))
p <- ggplot(datWSupp) +
  aes(x = x, y = y) +
  theme_bw() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank()
  ) +
  scale_x_continuous(breaks = sort(unique(datW$x))) +
  scale_y_continuous(
    breaks = sort(unique(datW$y)),
    sec.axis = dup_axis()
  ) +
  guides(x = guide_axis(angle = 90)) +
  geom_tile(na.rm = TRUE) +
  geom_rect(aes(xmin = x - 0.5, xmax = x + 0.5, ymin = y - 0.5, ymax = y + 0.5),
    color = "white"
  ) +
  geom_text(
    x = sum(xranges[1, ]) / 2, y = 10.7, label = "Block A",
    hjust = 0, color = "black"
  ) +
  geom_text(
    x = sum(xranges[2, ]) / 2, y = 10.7, label = "Block B",
    hjust = 0, color = "black"
  ) +
  geom_vline(
    xintercept = max(datW$x[datW$block == "A"]),
    color = "black", linetype = "dashed", linewidth = 1
  )

p + aes(fill = type) +
  labs(title = "Types of microplots") +
  scale_fill_discrete()


scaleCols <- c("#CB2027", "#ffec1b", "#b3e93e", "#60BD68", "#059748")
scaleLim <- range(datW$tot_yield)
p + aes(fill = tot_yield) +
  labs(title = "Total yield for each microplot") +
  scale_fill_continuous(type = "viridis")

4 Infer the parameters

4.1 Using intercrops only

4.1.1 Prepare the inputs

idxIC <- which(!is.na(datW$geno_S1) & !is.na(datW$geno_S2))
datW_IC <- droplevels(datW[idxIC, ])

listY <- list(Y_IC = datW_IC[, c("yield_S1", "yield_S2")])
listX <- list(X_IC = model.matrix(~ 1 + block + geno_S2, datW_IC,
  contrasts.arg = list(
    "block" = "contr.sum",
    "geno_S2" = "contr.sum"
  )
))
listZ <- list(Z_DS_f = model.matrix(~ 0 + geno_S1, datW_IC))
colnames(listZ$Z_DS_f) <- gsub("^geno_S1", "", colnames(listZ$Z_DS_f))
listVCov <- list(K = GRMs_vr_trial$S1[
  levels(datW_IC$geno_S1),
  levels(datW_IC$geno_S1)
])

4.1.2 Fit the model

fitsTmb <- list()
i <- 1
for (REML in c(TRUE, FALSE)) {
  print(paste0("fit model with ", ifelse(REML, "REML", "ML"), "..."))
  st <- system.time(
    fitTmb <- fitDBVSBVinter(listY, listX, listZ, listVCov,
      lOptions = list(iter.max = 20, quantifUncertain = TRUE),
      REML = REML, verbose = 0
    )
  )
  print(st)
  fitsTmb[[i]] <- fitTmb
  i <- i + 1
  break # skip ML to speed-up
}
## [1] "fit model with REML..."
##    user  system elapsed 
##  43.658   0.040  43.735
for (i in seq_along(fitsTmb)) {
  fitTmb <- fitsTmb[[i]]
  p <- ggplot(fitTmb$trace) +
    aes(x = iter, y = objfn) +
    geom_point() +
    geom_line() +
    labs(
      title = "Optimization convergence",
      subtitle = paste0("REML=", fitTmb$REML)
    ) +
    theme_bw()
  print(p)
}

4.1.3 Checks

for (i in seq_along(fitsTmb)) {
  fitTmb <- fitsTmb[[i]]
  print(paste0("REML=", fitTmb$REML))

  print("Check fixed effects:")
  checks <- data.frame(
    param = rownames(fitTmb$B_IC),
    truth = c(truth$B_IC),
    fitTmb$B_IC,
    row.names = NULL,
    check.names = FALSE
  )
  checks$nBE <- normBiasError(checks$Estimate, checks$truth)
  print(checks)

  print("Check (co)variances of random genetic effects:")
  checks <- data.frame(
    param = rownames(fitTmb$vcov_BV_f),
    truth = c(
      truth$var_DBV["S1"],
      truth$var_SBV["S1"],
      truth$cor_DBV_SBV["S1"]
    ),
    fitTmb$vcov_BV_f,
    row.names = NULL,
    check.names = FALSE
  )
  checks$nBE <- normBiasError(checks$Estimate, checks$truth)
  print(checks)

  print("Check (co)variances of residual errors:")
  checks <- data.frame(
    param = rownames(fitTmb$vcov_E_IC),
    truth = c(truth$var_err_IC, truth$cor_err_IC),
    fitTmb$vcov_E_IC,
    row.names = NULL,
    check.names = FALSE
  )
  checks$nBE <- normBiasError(checks$Estimate, checks$truth)
  print(checks)

  print("Check random genetic effects of the focal species:")
  checks <- data.frame(
    param = c(
      rep(c("DBV", "SBV"), each = nrow(truth$BV$S1)),
      rep("BV_IC", length(truth$BV_IC$S1))
    ),
    geno = levels(datW_IC$geno_S1),
    truth = c(
      truth$BV$S1[levels(datW_IC$geno_S1), ],
      truth$BV_IC$S1[levels(datW_IC$geno_S1)]
    ),
    rbind(
      fitTmb$DBV_f[levels(datW_IC$geno_S1), ],
      fitTmb$SBV_IC_f[levels(datW_IC$geno_S1), ],
      fitTmb$BV_IC_f[levels(datW_IC$geno_S1), ]
    ),
    row.names = NULL,
    check.names = FALSE
  )
  checks$param <- factor(checks$param,
    levels = c("BV_IC", "DBV", "SBV")
  )
  checks$nBE <- normBiasError(checks$Estimate, checks$truth)
  print(tapply(1:nrow(checks), checks$param, function(idx) {
    cor(checks$truth[idx], checks$Estimate[idx])
  }))
  p <- ggplot(checks) +
    aes(x = Estimate, y = truth) +
    geom_hline(yintercept = 0, linetype = "dotted") +
    geom_vline(xintercept = 0, linetype = "dotted") +
    geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
    geom_point() +
    labs(
      title = "Results with intercrop-only data",
      subtitle = paste0("REML=", fitTmb$REML)
    ) +
    theme_bw() +
    facet_wrap(~param)
  print(p)
}
## [1] "REML=TRUE"
## [1] "Check fixed effects:"
##                           param      truth   Estimate Std. Error    z value
## 1 B_IC (Intercept) on species 1 32.0000000 32.3638098  0.2940937 110.045903
## 2      B_IC block1 on species 1 -0.8791112 -0.7986116  0.3314186  -2.409677
## 3    B_IC geno_S21 on species 1  0.3530473  0.7292304  0.3339525   2.183635
## 4 B_IC (Intercept) on species 2 27.0000000 26.8807304  0.1185591 226.728560
## 5      B_IC block1 on species 2  1.4664826  1.5104990  0.1363525  11.077899
## 6    B_IC geno_S21 on species 2 -0.9739075 -1.1812426  0.1380911  -8.554084
##     Pr(>|z^2|)         nBE
## 1 0.000000e+00   1.1369058
## 2 1.596666e-02  -9.1569310
## 3 2.898907e-02 106.5531880
## 4 0.000000e+00  -0.4417393
## 5 1.605964e-28   3.0014955
## 6 1.188089e-17  21.2889905
## [1] "Check (co)variances of random genetic effects:"
##          param  truth  Estimate Std. Error    z value   Pr(>|z^2|)        nBE
## 1    var_DBV_f 27.040 12.192495 4.42066333   2.758069 5.814390e-03 -54.909411
## 2 var_SBV_IC_f  5.408  2.474440 1.02626269   2.411117 1.590374e-02 -54.244829
## 3     cor_BV_f -0.900 -0.860585 0.08378915 -10.270841 9.536994e-25  -4.379442
## [1] "Check (co)variances of residual errors:"
##                    param     truth  Estimate Std. Error    z value   Pr(>|z^2|)
## 1 vars_E_IC of species 1  4.577666 21.318351  4.4215679   4.821446 1.425212e-06
## 2 vars_E_IC of species 2  0.975124  3.310185  0.9861155   3.356793 7.885224e-04
## 3               cor_E_IC -0.200000 -0.751424  0.0680906 -11.035649 2.571880e-28
##        nBE
## 1 365.7035
## 2 239.4630
## 3 275.7120
## [1] "Check random genetic effects of the focal species:"
##     BV_IC       DBV       SBV 
## 0.8508720 0.8959716 0.8761980

4.1.4 Parametric bootstrap

if (FALSE) { # slow
  system.time(
    pmTmb <- paramBoot4TMB(fitTmb, nb_boot = 5)
  )
  summary(do.call(rbind, pmTmb))
  fitTmb$sry_sdr[names(pmTmb[[1]]), ]
}

4.2 Using both sole crops and intercrops

4.2.1 Prepare the inputs

idxIC <- which(!is.na(datW$geno_S1) & !is.na(datW$geno_S2))
datW_IC <- droplevels(datW[idxIC, ])
idxSCf <- which(datL$type == "SC" & datL$focal_species == "S1")
datL_SC_f <- droplevels(datL[idxSCf, ])
idxSCt <- which(datL$type == "SC" & datL$focal_species == "S2")
datL_SC_t <- droplevels(datL[idxSCt, ])

listY <- list()
listY$Y_IC <- datW_IC[, c("yield_S1", "yield_S2")]
listY$y_SC_f <- datL_SC_f$focal_yield
listY$y_SC_t <- datL_SC_t$focal_yield
sapply(listY[-1], length)
## y_SC_f y_SC_t 
##    300      4

listX <- list()
listX$X_IC <- model.matrix(~ 1 + block + geno_S2,
  data = datW_IC,
  contrasts.arg = list(
    "block" = "contr.sum",
    "geno_S2" = "contr.sum"
  )
)
listX$X_SC_f <- model.matrix(~ 1 + block, datL_SC_f,
  contrasts.arg = list(block = "contr.sum")
)
listX$X_SC_t <- model.matrix(~ 1 + block + focal, datL_SC_t,
  contrasts.arg = list(
    block = "contr.sum",
    focal = "contr.sum"
  )
)

listZ <- list()
listZ$Z_DS_f <- model.matrix(~ 0 + geno_S1, datW_IC)
colnames(listZ$Z_DS_f) <- gsub("^geno_S1", "", colnames(listZ$Z_DS_f))
listZ$Z_D_f <- model.matrix(~ 0 + focal, datL_SC_f)
colnames(listZ$Z_D_f) <- gsub("^focal", "", colnames(listZ$Z_D_f))

listVCov <- list(K = GRMs_vr_trial$S1[
  levels(datW_IC$geno_S1),
  levels(datW_IC$geno_S1)
])

4.2.2 Fit the model

fitsTmb <- list()
i <- 1
for (REML in c(TRUE, FALSE)) {
  print(paste0("fit model with ", ifelse(REML, "REML", "ML"), "..."))
  st <- system.time(
    fitTmb <- fitDBVSBVinter(listY, listX, listZ, listVCov,
      lOptions = list(iter.max = 20),
      REML = REML, verbose = 0
    )
  )
  print(st)
  fitsTmb[[i]] <- fitTmb
  i <- i + 1
  break # skip ML to speed-up
}
## [1] "fit model with REML..."
##    user  system elapsed 
##  95.610   0.047  95.722
for (i in seq_along(fitsTmb)) {
  fitTmb <- fitsTmb[[i]]
  p <- ggplot(fitTmb$trace) +
    aes(x = iter, y = objfn) +
    geom_point() +
    geom_line() +
    labs(
      title = "Optimization convergence",
      subtitle = paste0("REML=", fitTmb$REML)
    ) +
    theme_bw()
  print(p)
}

4.2.3 Checks

for (i in seq_along(fitsTmb)) {
  fitTmb <- fitsTmb[[i]]
  print(paste0("REML=", fitTmb$REML))

  print("Check fixed effects:")
  checks <- data.frame(
    param = rownames(fitTmb$B_IC),
    truth = c(truth$B_IC),
    fitTmb$B_IC,
    row.names = NULL,
    check.names = FALSE
  )
  checks$nBE <- normBiasError(checks$Estimate, checks$truth)
  print(checks)
  checks <- data.frame(
    param = rownames(fitTmb$beta_SC_f),
    truth = obsMC$blObsContrs$S1[, "SC"],
    fitTmb$beta_SC_f,
    row.names = NULL,
    check.names = FALSE
  )
  checks$nBE <- normBiasError(checks$Estimate, checks$truth)
  print(checks)
  checks <- data.frame(
    param = rownames(fitTmb$beta_SC_t),
    truth = c(
      obsMC$blObsContrs$S2[, "SC"],
      obsMC$BVObsContrs$S2[-1, "SC", "DBV"]
    ),
    fitTmb$beta_SC_t,
    row.names = NULL,
    check.names = FALSE
  )
  checks$nBE <- normBiasError(checks$Estimate, checks$truth)
  print(checks)

  print("Check (co)variances of random genetic effects:")
  checks <- data.frame(
    param = c(
      rownames(fitTmb$vcov_BV_f),
      rownames(fitTmb$var_SIGV_f)
    ),
    truth = c(
      truth$var_DBV["S1"],
      truth$var_SBV["S1"],
      truth$cor_DBV_SBV["S1"],
      truth$var_SIGV["S1"]
    ),
    rbind(
      fitTmb$vcov_BV_f,
      fitTmb$var_SIGV_f
    ),
    row.names = NULL,
    check.names = FALSE
  )
  checks$nBE <- normBiasError(checks$Estimate, checks$truth)
  print(checks)

  print("Check (co)variances of residual errors:")
  checks <- data.frame(
    param = c(
      rownames(fitTmb$vcov_E_IC),
      rownames(fitTmb$var_err_SC_f),
      rownames(fitTmb$var_err_SC_t)
    ),
    truth = c(
      truth$var_err_IC, truth$cor_err_IC,
      truth$var_err_SC
    ),
    rbind(
      fitTmb$vcov_E_IC,
      fitTmb$var_err_SC_f,
      fitTmb$var_err_SC_t
    ),
    row.names = NULL,
    check.names = FALSE
  )
  checks$nBE <- normBiasError(checks$Estimate, checks$truth)
  print(checks)

  print("Check random genetic effects of the focal species:")
  checks <- data.frame(
    param = rep(c("DBV", "SBV", "BV_IC", "SIGV", "BV_SC"),
      each = nrow(truth$BV$S1)
    ),
    geno = levels(datW_IC$geno_S1),
    truth = c(
      truth$BV$S1[levels(datW_IC$geno_S1), ],
      truth$BV_IC$S1[levels(datW_IC$geno_S1)],
      truth$SIGVs$S1[levels(datW_IC$geno_S1)],
      truth$BV_SC$S1[levels(datW_IC$geno_S1)]
    ),
    rbind(
      fitTmb$DBV_f[levels(datW_IC$geno_S1), ],
      fitTmb$SBV_IC_f[levels(datW_IC$geno_S1), ],
      fitTmb$BV_IC_f[levels(datW_IC$geno_S1), ],
      fitTmb$SIGV_f[levels(datW_IC$geno_S1), ],
      fitTmb$BV_SC_f[levels(datW_IC$geno_S1), ]
    )
  )
  checks$param <- factor(checks$param,
    levels = c("BV_SC", "BV_IC", "DBV", "SBV", "SIGV")
  )
  checks$nBE <- normBiasError(checks$Estimate, checks$truth)
  print(tapply(1:nrow(checks), checks$param, function(idx) {
    cor(checks$truth[idx], checks$Estimate[idx])
  }))
  p <- ggplot(checks) +
    aes(x = Estimate, y = truth) +
    geom_hline(yintercept = 0, linetype = "dotted") +
    geom_vline(xintercept = 0, linetype = "dotted") +
    geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
    geom_point() +
    labs(
      title = "Results with both sole-crop and intercrop data",
      subtitle = paste0("REML=", fitTmb$REML)
    ) +
    theme_bw() +
    facet_wrap(~param)
  print(p)
}
## [1] "REML=TRUE"
## [1] "Check fixed effects:"
##                           param      truth   Estimate Std. Error    z value
## 1 B_IC (Intercept) on species 1 32.0000000 32.4375254 0.22225758 145.945642
## 2      B_IC block1 on species 1 -0.8791112 -0.9980429 0.25158582  -3.967008
## 3    B_IC geno_S21 on species 1  0.3530473  0.6294462 0.31967477   1.969021
## 4 B_IC (Intercept) on species 2 27.0000000 26.8649213 0.09629028 278.999310
## 5      B_IC block1 on species 2  1.4664826  1.5686429 0.11209897  13.993375
## 6    B_IC geno_S21 on species 2 -0.9739075 -1.1524156 0.13287865  -8.672692
##     Pr(>|z^2|)        nBE
## 1 0.000000e+00  1.3672668
## 2 7.278066e-05 13.5286218
## 3 4.895073e-02 78.2894929
## 4 0.000000e+00 -0.5002914
## 5 1.710955e-44  6.9663490
## 6 4.220239e-18 18.3290531
##                   param      truth  Estimate Std. Error    z value Pr(>|z^2|)
## 1 beta_SC_f (Intercept) 65.1400649 65.292049  0.2879992 226.709097  0.0000000
## 2      beta_SC_f block1 -0.8957405 -1.042209  0.4155246  -2.508177  0.0121356
##          nBE
## 1  0.2333187
## 2 16.3516902
##                   param     truth  Estimate Std. Error   z value   Pr(>|z^2|)
## 1 beta_SC_t (Intercept) 31.241459 31.241459  0.3280738 95.226937 0.0000000000
## 2      beta_SC_t block1  1.112205  1.112205  0.3280738  3.390105 0.0006986580
## 3      beta_SC_t focal1 -1.267077 -1.267077  0.3280738 -3.862170 0.0001123843
##            nBE
## 1 1.137179e-14
## 2 0.000000e+00
## 3 1.401933e-13
## [1] "Check (co)variances of random genetic effects:"
##          param  truth   Estimate Std. Error    z value    Pr(>|z^2|)
## 1    var_DBV_f 27.040 30.3383826  3.4546339   8.781939  1.606808e-18
## 2 var_SBV_IC_f  5.408  4.7672014  0.6683992   7.132267  9.872899e-13
## 3     cor_BV_f -0.900 -0.9021213  0.0317730 -28.392701 2.488483e-177
## 4   var_SIGV_f 13.520 20.8457794  5.4165216   3.848555  1.188168e-04
##           nBE
## 1  12.1981605
## 2 -11.8490872
## 3   0.2357053
## 4  54.1847591
## [1] "Check (co)variances of residual errors:"
##                    param     truth   Estimate Std. Error   z value   Pr(>|z^2|)
## 1 vars_E_IC of species 1  4.577666  6.0977068  1.6481894  3.699640 0.0002159058
## 2 vars_E_IC of species 2  0.975124  1.3634721  0.3981436  3.424573 0.0006157654
## 3               cor_E_IC -0.200000 -0.4012402  0.1504564 -2.666821 0.0076572374
## 4           var_err_SC_f 17.382857 10.1436226  4.2442067  2.389993 0.0168487132
## 5           var_err_SC_t  2.468571  0.4305296  1.9212347  0.224090 0.8226872425
##         nBE
## 1  33.20560
## 2  39.82550
## 3 100.62012
## 4 -41.64583
## 5 -82.55957
## [1] "Check random genetic effects of the focal species:"
##     BV_SC     BV_IC       DBV       SBV      SIGV 
## 0.8945796 0.8840782 0.9358907 0.9299767 0.7146412

5 Reference

See the article for more details:

6 Appendix

t1 <- proc.time()
t1 - t0
##    user  system elapsed 
## 159.298   0.243 159.617
print(sessionInfo(), locale = FALSE)
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Debian GNU/Linux 12 (bookworm)
## 
## Matrix products: default
## BLAS:   /usr/local/R/4.5.1/lib/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.11.0  LAPACK version 3.11.0
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] plantmix_1.0.3 lme4_1.1-37    Matrix_1.7-3   ggplot2_3.5.2  knitr_1.50    
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       jsonlite_2.0.0     dplyr_1.1.4        compiler_4.5.1    
##  [5] Rcpp_1.1.0         tidyselect_1.2.1   dichromat_2.0-0.1  jquerylib_0.1.4   
##  [9] splines_4.5.1      scales_1.4.0       boot_1.3-31        yaml_2.3.10       
## [13] fastmap_1.2.0      lattice_0.22-7     R6_2.6.1           labeling_0.4.3    
## [17] generics_0.1.4     igraph_2.1.4       rbibutils_2.3      MASS_7.3-65       
## [21] tibble_3.3.0       nloptr_2.2.1       minqa_1.2.8        TMB_1.9.21        
## [25] bslib_0.9.0        pillar_1.11.0      RColorBrewer_1.1-3 rlang_1.1.7       
## [29] cachem_1.1.0       xfun_0.52          sass_0.4.10        viridisLite_0.4.2 
## [33] cli_3.6.5          withr_3.0.2        magrittr_2.0.3     Rdpack_2.6.4      
## [37] digest_0.6.37      grid_4.5.1         lifecycle_1.0.5    nlme_3.1-168      
## [41] reformulas_0.4.4   vctrs_0.6.5        evaluate_1.0.4     glue_1.8.0        
## [45] farver_2.1.2       rmarkdown_2.29     tools_4.5.1        pkgconfig_2.0.3   
## [49] htmltools_0.5.8.1