## ----secret-header, echo=FALSE------------------------------------------------
set.seed(0)

## ----generate-trees-----------------------------------------------------------
library('TreeTools', quietly = TRUE, warn.conflicts = FALSE)
treeNumbers <- c(1:220)
trees <- as.phylo(treeNumbers, 8)
spectrum <- viridisLite::plasma(220)
treeCols <- spectrum[treeNumbers]

## ----calculate-distances------------------------------------------------------
library('TreeDist')
distances <- ClusteringInfoDistance(trees)

## ----projection---------------------------------------------------------------
projection <- cmdscale(distances, k = 12)

## ----plot-projection-2d, fig.asp = 1, fig.width = 3, fig.align='center'-------
par(mar = rep(0, 4))
plot(projection,
     asp = 1, # Preserve aspect ratio - do not distort distances
     ann = FALSE, axes = FALSE, # Don't label axes: dimensions are meaningless
     col = treeCols, pch = 16
     )

## ----clustering, fig.align='center'-------------------------------------------
possibleClusters <- 2:10

pamClusters <- lapply(possibleClusters, function (k) cluster::pam(distances, k = k))
pamSils <- vapply(pamClusters, function (pamCluster) {
  mean(cluster::silhouette(pamCluster)[, 3])
}, double(1))

bestPam <- which.max(pamSils)
pamSil <- pamSils[bestPam]
pamCluster <- pamClusters[[bestPam]]$cluster

hTree <- protoclust::protoclust(distances)
hClusters <- lapply(possibleClusters, function (k) cutree(hTree, k = k))
hSils <- vapply(hClusters, function (hCluster) {
  mean(cluster::silhouette(hCluster, distances)[, 3])
}, double(1))


bestH <- which.max(hSils)
hSil <- hSils[bestH]
hCluster <- hClusters[[bestH]]

plot(pamSils ~ possibleClusters,
     xlab = 'Number of clusters', ylab = 'Silhouette coefficient',
     ylim = range(c(pamSils, hSils)))
points(hSils ~ possibleClusters, pch = 2)
legend('topright', c('PAM', 'Hierarchical'), pch = 1:2)

## ----chosen-cluster-----------------------------------------------------------
cluster <- hClusters[[2 - 1]]

## ----h-tree, fig.align='center'-----------------------------------------------
class(hTree) <- 'hclust'
par(mar = c(0, 0, 0, 0))
plot(hTree, labels = FALSE, main = '')
points(seq_along(trees), rep(1, length(trees)), pch = 16,
       col = spectrum[hTree$order])

## ----consensus, fig.align='center'--------------------------------------------
par(mfrow = c(1, 2), mar = rep(0.2, 4))
col1 <- spectrum[mean(treeNumbers[cluster == 1])]
col2 <- spectrum[mean(treeNumbers[cluster == 2])]
plot(consensus(trees[cluster == 1]), edge.color = col1, edge.width = 2, tip.color = col1)
plot(consensus(trees[cluster == 2]), edge.color = col2, edge.width = 2, tip.color = col2)

## ----how-many-dims, fig.align='center'----------------------------------------
txc <- vapply(1:12, function (k) {
  newDist <- dist(projection[, seq_len(k)])
  ProjectionQuality(distances, newDist, 10)['TxC']
}, 0)
plot(txc, xlab = 'Dimension')
abline(h = 0.9, lty = 2)

## ----calculate-MST------------------------------------------------------------
mstEnds <- MSTEdges(distances)

## ----plot-projection-5d, fig.asp = 1, fig.align='center'----------------------
plotSeq <- matrix(0, 5, 5)
plotSeq[upper.tri(plotSeq)] <- seq_len(5 * (5 - 1) / 2)
plotSeq <- t(plotSeq[-5, -1])
plotSeq[c(5, 10, 15)] <- 11:13
layout(plotSeq)
par(mar = rep(0.1, 4))

for (i in 2:5) for (j in seq_len(i - 1)) {
  # Set up blank plot
  plot(projection[, j], projection[, i], ann = FALSE, axes = FALSE, frame.plot = TRUE,
       type = 'n', asp = 1, xlim = range(projection), ylim = range(projection))
  
  # Plot MST
  apply(mstEnds, 1, function (segment)
    lines(projection[segment, j], projection[segment, i], col = "#bbbbbb", lty = 1))
  
  # Add points
  points(projection[, j], projection[, i], pch = 16, col = treeCols)

  # Mark clusters
  for (clI in unique(cluster)) {
    inCluster <- cluster == clI
    clusterX <- projection[inCluster, j]
    clusterY <- projection[inCluster, i]
    hull <- chull(clusterX, clusterY)
    polygon(clusterX[hull], clusterY[hull], lty = 1, lwd = 2,
            border = '#54de25bb')
  }
}
# Annotate dimensions
plot(0, 0, type = 'n', ann = FALSE, axes = FALSE)
text(0, 0, 'Dimension 2')
plot(0, 0, type = 'n', ann = FALSE, axes = FALSE)
text(0, 0, 'Dimension 3')
plot(0, 0, type = 'n', ann = FALSE, axes = FALSE)
text(0, 0, 'Dimension 4')

## ----pid, fig.asp = 1, fig.width = 4, fig.align = 'center', echo = FALSE------
library('TreeDist')
pid_distances <- PhylogeneticInfoDistance(trees)
pid_projection <- cmdscale(pid_distances, k = 6)
pid_cluster <- cutree(protoclust::protoclust(pid_distances), k = 2)

par(mar = rep(0, 4))
plot(pid_projection, ann = FALSE, axes = FALSE, asp = 1,
     col = treeCols, pch = 16)
MSTEdges(pid_distances, TRUE, pid_projection[, 1], pid_projection[, 2],
         col = "#bbbbbb", lty = 1)
for (clI in 1:2) {
  inCluster <- pid_cluster == clI
  clusterX <- pid_projection[inCluster, 1]
  clusterY <- pid_projection[inCluster, 2]
  hull <- chull(clusterX, clusterY)
  polygon(clusterX[hull], clusterY[hull], lty = 1, lwd = 2,
          border = '#54de25bb')
}

