Subtrees as triangles with ggtree

To summarise the information displayed with a tree it’s sometimes useful to reduce the tree. With ggtree we can collapse nodes with the collapse() function but, I’m not really satisfied. Triangles are often used to group tips with similar features and I would like to do the same thing with ggtree. I quickly searched and didn’t find how to do it, so I came up with my own solution. It’s also an example on how we can use ggplot2 and ggtree to display more information on a tree.

library(purrr)
library(ape)
library(ggtree)
library(ggplot2)
library(dplyr)
library(tidytree)

How to replace one group with one triangle?

Let’s start with a random tree.

# random tree
set.seed(1234)
test_tree <- rtree(20)

test_tree
## 
## Phylogenetic tree with 20 tips and 19 internal nodes.
## 
## Tip labels:
## 	t6, t15, t14, t18, t17, t8, ...
## 
## Rooted; includes branch lengths.
# display the tree wit the internal node numbers to choose internal nodes to collapse
ggtree(test_tree) +
    geom_label(mapping = aes(label = node), size = 4)

I’m choosing the node 26 for this example. Let’s see what collapse() from does.

node_to_collapse <-  26

collapsed_tree <- ggtree(test_tree) %>% ggtree::collapse(node = node_to_collapse)
collapsed_tree + geom_point2(
    mapping = aes(subset=(node == node_to_collapse)),
    size = 5,
    shape = 23,
    fill = "steelblue"
) +
  geom_tiplab(
    mapping = aes(subset=(node == node_to_collapse), label = node), 
    offset = 0.05
)

So what I want to do is to replace a subtree by a triangle. We can find all the information to draw this triangle in the plotted tree object. By using the fortify() function from ggtre package.

I need to create a new data_frame with the right coordinates to add a triangle with geom_polygon() (vanilla ggplot).

test_tree %>%
    ggtree::fortify()
## # A tibble: 39 x 9
##    parent  node branch.length label isTip     x     y branch angle
##     <int> <int>         <dbl> <chr> <lgl> <dbl> <dbl>  <dbl> <dbl>
##  1     27     1        0.502  t6    TRUE   3.77    18   3.52   324
##  2     28     2        0.485  t15   TRUE   4.43    19   4.19   342
##  3     28     3        0.244  t14   TRUE   4.19    20   4.07   360
##  4     26     4        0.765  t18   TRUE   3.71    17   3.32   306
##  5     25     5        0.0738 t17   TRUE   2.39    16   2.36   288
##  6     24     6        0.310  t8    TRUE   2.32    15   2.16   270
##  7     29     7        0.505  t16   TRUE   2.58    11   2.33   198
##  8     31     8        0.494  t4    TRUE   3.23    13   2.98   234
##  9     31     9        0.751  t13   TRUE   3.49    14   3.11   252
## 10     30    10        0.175  t5    TRUE   2.41    12   2.32   216
## # … with 29 more rows
node_to_collapse_offsprings <- test_tree %>%
    ggtree::fortify() %>%
    tidytree::offspring(.node = node_to_collapse)

node_to_collapse_offsprings_tips <- node_to_collapse_offsprings %>%
    filter(!is.na(label)) %>%
    pull(node)

Now the coordinates of the nodes to replace by the triangle.

# define one endpoint at the position of the "collapsed" node
node_to_collapse_xy <- test_tree %>%
    ggplot2::fortify() %>%
    as_tibble() %>%
    dplyr::filter(node == node_to_collapse) %>%
    dplyr::select(x, y) 

node_to_collapse_xy
## # A tibble: 1 x 2
##       x     y
##   <dbl> <dbl>
## 1  2.94  17.9
# this will define the 2 other endpoints
tips_to_collapse_xy <- test_tree %>%
    ggplot2::fortify() %>%
    as_tibble() %>%
    dplyr::filter(node %in% node_to_collapse_offsprings_tips) %>%
    dplyr::select(x, y) %>%
    summarise(xmax = max(x), xmin = min(x), ymax = max(y), ymin = min(y)) 

tips_to_collapse_xy 
## # A tibble: 1 x 4
##    xmax  xmin  ymax  ymin
##   <dbl> <dbl> <dbl> <dbl>
## 1  4.43  3.71    20    17
library(ggrepel)

ggtree(test_tree) +
  geom_point(data = node_to_collapse_xy, color = "red") +
  ggrepel::geom_label_repel(data = node_to_collapse_xy, label = "endpoint 1", point.padding = 0.4) +
  geom_rect(
    data = tips_to_collapse_xy, 
    mapping = aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
    color = "darkgrey",
    alpha = 0,
    linetype = "dotted",
    inherit.aes = FALSE
  ) +
  annotate(geom = "point", x = tips_to_collapse_xy$xmax, y = tips_to_collapse_xy$ymin, color = "red") +
  annotate(geom = "label_repel", x = tips_to_collapse_xy$xmax, y = tips_to_collapse_xy$ymin, label = "endpoint 2", point.padding = 0.5) +
  annotate(geom = "point", x = tips_to_collapse_xy$xmax, y = tips_to_collapse_xy$ymax, color = "red") +
  annotate(geom = "label_repel", x = tips_to_collapse_xy$xmax, y = tips_to_collapse_xy$ymax, label = "endpoint 3", point.padding = 0.5) 

The dataframe with the positions of the 3 endpoints

triange_df <- tibble(
    node = rep(node_to_collapse, 3), # index
    x = c(node_to_collapse_xy$x, tips_to_collapse_xy$xmax, tips_to_collapse_xy$xmax),
    y = c(node_to_collapse_xy$y, tips_to_collapse_xy$ymin, tips_to_collapse_xy$ymax)
)

triange_df
## # A tibble: 3 x 3
##    node     x     y
##   <dbl> <dbl> <dbl>
## 1    26  2.94  17.9
## 2    26  4.43  17  
## 3    26  4.43  20

We can now plot our triangle over the tree. The next step will be to trim the tree to hide the collapsed nodes.

ggtree(test_tree) +
    geom_polygon(
      data = triange_df, 
      mapping = aes(group = node),
      alpha = 0.5
    ) 

After transformation of the phylo object to a tibble, and modified, ggtree is still able to plot it.

tree_without_collapsed_nodes <- test_tree %>%
  fortify() %>% 
  as_tibble() %>%
  filter(! node %in% node_to_collapse_offsprings$node)

tree_without_collapsed_nodes
## # A tibble: 33 x 9
##    parent  node branch.length label isTip     x     y branch angle
##     <int> <int>         <dbl> <chr> <lgl> <dbl> <dbl>  <dbl> <dbl>
##  1     25     5        0.0738 t17   TRUE   2.39    16   2.36   288
##  2     24     6        0.310  t8    TRUE   2.32    15   2.16   270
##  3     29     7        0.505  t16   TRUE   2.58    11   2.33   198
##  4     31     8        0.494  t4    TRUE   3.23    13   2.98   234
##  5     31     9        0.751  t13   TRUE   3.49    14   3.11   252
##  6     30    10        0.175  t5    TRUE   2.41    12   2.32   216
##  7     34    11        0.317  t19   TRUE   2.88     7   2.72   126
##  8     34    12        0.0137 t12   TRUE   2.58     8   2.57   144
##  9     35    13        0.706  t10   TRUE   3.47     9   3.11   162
## 10     35    14        0.308  t3    TRUE   3.07    10   2.91   180
## # … with 23 more rows

Et voilà !

 ggtree(tree_without_collapsed_nodes) +
  geom_polygon(
    data = triange_df, 
    mapping = aes(group = node),
    color = "#333333"
  ) 

Replacing multiple subtrees with triangles

After that I wrote functions to make the process simplier and to be able to choose multiple tips to group. It can certainly be improved, for now the input is a vector of internal nodes from starting point of the triangles. The code can be modified to start from a table of tips and to name each groups, with the clade name for example.

get_offsprings <- function(node_to_collapse, phylo) {
  phylo %>%
    ggtree::fortify() %>%
    tidytree::offspring(.node = node_to_collapse) %>%
    dplyr::pull(node)
}

get_offspring_tips <- function(phylo, node_to_collapse) {
  phylo %>%
    ggtree::fortify() %>%
    tidytree::offspring(.node = node_to_collapse) %>%
    dplyr::filter(isTip) %>%
    dplyr::pull(node)
}

remove_collapsed_nodes <- function(phylo, nodes_to_collapse) {
  nodes <- purrr::map(nodes_to_collapse, get_offsprings, phylo = phylo) %>% unlist()
  phylo %>%
    ggtree::fortify() %>%
    tibble::as_tibble() %>%
    dplyr::filter(!node %in% nodes)
}

get_collapsed_offspring_nodes_coordinates <- function(phylo, nodes) {
  phylo %>%
    ggtree::fortify() %>%
    tibble::as_tibble() %>%
    dplyr::filter(node %in% nodes) %>%
    dplyr::summarise(xmax = max(x), xmin = min(x), ymax = max(y), ymin = min(y))
}

get_collapsed_node_coordinates <- function(phylo, node_to_collapse) {
  # todo: assert that node is scalar
  phylo %>%
    ggtree::fortify() %>%
    tibble::as_tibble() %>%
    dplyr::filter(node == node_to_collapse) %>%
    dplyr::select(x, y)
}

get_triangle_coordinates_ <- function(node, phylo, mode = c("max", "min", "mixed")) {
  mode <- match.arg(mode)
  # for one
  tips_to_collapse <- get_offspring_tips(phylo, node)
  node_to_collapse_xy <- get_collapsed_node_coordinates(phylo, node)
  tips_to_collapse_xy <- get_collapsed_offspring_nodes_coordinates(phylo, tips_to_collapse)

  triange_df <- mode %>% switch(
    max = dplyr::tibble(
      x = c(node_to_collapse_xy$x, tips_to_collapse_xy$xmax, tips_to_collapse_xy$xmax),
      y = c(node_to_collapse_xy$y, tips_to_collapse_xy$ymin, tips_to_collapse_xy$ymax)
    ),
    min = tibble(
      x = c(node_to_collapse_xy$x, tips_to_collapse_xy$xmin, tips_to_collapse_xy$xmin),
      y = c(node_to_collapse_xy$y, tips_to_collapse_xy$ymin, tips_to_collapse_xy$ymax)
    ),
    mixed = tibble(
      x = c(node_to_collapse_xy$x, tips_to_collapse_xy$xmin, tips_to_collapse_xy$xmax),
      y = c(node_to_collapse_xy$y, tips_to_collapse_xy$ymin, tips_to_collapse_xy$ymax)
    )
  )
  return(triange_df)
}

get_triangle_coordinates <- function(phylo, nodes, mode = c("max", "min", "mixed")) {
  mode <- match.arg(mode)
  purrr::map(nodes, get_triangle_coordinates_, phylo = phylo, mode = mode) %>%
    dplyr::bind_rows(.id = "node_collapsed")
}

In the previous example I picked the maximum values but that can be changed by specifying a different “mode”: “max”, “min” and “mixed” (it takes a the minimum x and y values for endpoint 2 and the maximums for endpoint 3).

nodes_to_collapse <- c(26, 31, 34, 22)

collapsed_tree_df <- test_tree %>%
    remove_collapsed_nodes(nodes = nodes_to_collapse)

triangles_df <- test_tree %>%
    get_triangle_coordinates(nodes_to_collapse)

modes <- c('max', 'min', 'mixed')

triangles_mode_df <- modes %>%
    purrr::map(function(mode) get_triangle_coordinates(phylo = test_tree, nodes = nodes_to_collapse, mode = mode)) %>%
    set_names(modes) %>%
    bind_rows(.id = "mode")

ggtree(collapsed_tree_df) +
    geom_treescale() +
    geom_polygon(
      data = triangles_mode_df, 
      mapping = aes(group = node_collapsed, fill = node_collapsed), 
      color = "#333333"
    ) +
    facet_grid(. ~ mode) +
    scale_fill_brewer(palette = "Set1") +
    theme(
        strip.background = element_blank()
    )

I hope it will be useful, here is a gist. Edit: not up-to-date

Session info

## R version 3.6.0 (2019-04-26)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_IE.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_IE.UTF-8        LC_COLLATE=en_IE.UTF-8    
##  [5] LC_MONETARY=en_IE.UTF-8    LC_MESSAGES=en_IE.UTF-8   
##  [7] LC_PAPER=en_IE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_IE.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggrepel_0.8.1  tidytree_0.2.4 dplyr_0.8.1    ggplot2_3.1.1 
## [5] ggtree_1.16.1  ape_5.3        purrr_0.3.2   
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1         RColorBrewer_1.1-2 compiler_3.6.0    
##  [4] pillar_1.4.1       plyr_1.8.4         tools_3.6.0       
##  [7] zeallot_0.1.0      digest_0.6.19      jsonlite_1.6      
## [10] evaluate_0.14      tibble_2.1.3       nlme_3.1-140      
## [13] gtable_0.3.0       lattice_0.20-38    pkgconfig_2.0.2   
## [16] rlang_0.3.4        cli_1.1.0          rvcheck_0.1.3     
## [19] yaml_2.2.0         parallel_3.6.0     blogdown_0.13.1   
## [22] xfun_0.7           treeio_1.8.1       withr_2.1.2       
## [25] stringr_1.4.0      knitr_1.23         vctrs_0.1.0       
## [28] grid_3.6.0         tidyselect_0.2.5   glue_1.3.1        
## [31] R6_2.4.0           fansi_0.4.0        rmarkdown_1.13    
## [34] bookdown_0.11      reshape2_1.4.3     tidyr_0.8.3       
## [37] magrittr_1.5       backports_1.1.4    scales_1.0.0      
## [40] htmltools_0.3.6    assertthat_0.2.1   colorspace_1.4-1  
## [43] labeling_0.3       utf8_1.1.4         stringi_1.4.3     
## [46] lazyeval_0.2.2     munsell_0.5.0      crayon_1.3.4
Jean Manguy, PhD
Research co-ordinator

Related