suppressPackageStartupMessages({
library(tidyverse)
library(cowplot)
library(aplot)
library(ggtree)
})
theme_set(theme_cowplot())
options(repr.plot.width=15,repr.plot.height=9)

Clustered Heatmap#

There are many packages that generate clustered heatmaps on ggplot2, but none of them are composable.

This notebook illustrates how to make a clustered heatmap in a composable way, using the aplot package.

code#

insert_generic <- function(data, plot, side, size=1) {
    if(side == 'top') {
        insert_top(data, plot, height=size)
    } else if(side == 'bottom') {
        insert_bottom(data, plot, height=size)
    } else if(side == 'left') {
        insert_left(data, plot, width=size)
    } else if(side == 'right') {
        insert_right(data, plot, width=size)
    } else {
        stop('unknown side')
    }
}
insert_hclust <- function(ggp,
                          cluster_by,
                          side=c('top','left'),
                          dist_method="euclidean",
                          hclust_method='complete',
                          size=0.1) {

    plot <- ggp
    if('aplot' %in% class(ggp)) {
        ggp <- plot$plotlist[[1]]
    }

    mdata <-
        ggp$data |>
        mutate(!!!ggp$mapping) |>
        pivot_wider(names_from = 'x', values_from = {{cluster_by}}, id_cols=y) |>
        column_to_rownames('y') |>
        as.matrix()

    reduce(side, function(p, s) {
        if(s %in% c('left','right')) {
            dd <-
            mdata |>
            dist(method=dist_method) |>
            hclust(method=hclust_method) |>
            ggtree(branch.length='none')
            if(s == 'right') { dd <- dd + scale_x_reverse() }
            p <- insert_generic(p, dd, side=s, size=size)
        }
        if(s %in% c('top','bottom')) {
            dd <-
            t(mdata) |>
            dist(method=dist_method) |>
            hclust(method=hclust_method) |>
            ggtree(branch.length='none')
            if(s == 'bottom') { dd <- dd + coord_flip() }
            else { dd <- dd + layout_dendrogram() }
            p <- insert_generic(p, dd, side=s, size=size)
        }
        return(p)
    }, .init=plot)
}
insert_annot <- function(ggp, side, var, size=0.02, ggextra=NULL) {
    plot <- ggp
    if('aplot' %in% class(ggp)) {
        ggp <- plot$plotlist[[1]]
    }

    rem_axis <- list(coord_cartesian(expand=0) , theme(
        line=element_blank(),
        rect=element_blank(),
        axis.text=element_blank(),
        axis.title = element_blank(),
        plot.margin = margin(t = 0, r = 0,b = 0,l = 0)
    ))

    if(side %in% c('top','bottom')) {
        p <- 
            ggplot(ggp$data, aes(x=!!ggp$mapping$x, y=1, fill={{var}})) + 
            geom_tile() + rem_axis + ggextra
    }
    if(side %in% c('left','right')) {
        p <- 
            ggplot(ggp$data, aes(y=!!ggp$mapping$y, x=1, fill={{var}})) + 
            geom_tile() + rem_axis + ggextra
    }
    insert_generic(data=plot, plot=p, side=side, size=size)
}

Basic Heatmap#

data <-
    rownames_to_column(mtcars, 'model') |>
    pivot_longer(names_to = 'var', values_to = 'value', -model) |>
    mutate(scaled.val = scale(value), .by=var)
head(data, 3)
A tibble: 3 × 4
modelvarvaluescaled.val
<chr><chr><dbl><dbl[,1]>
Mazda RX4mpg 21 0.150884824647657
Mazda RX4cyl 6-0.104987808575239
Mazda RX4disp160-0.570619818667904
(
    data |>
    ggplot(aes(y=model, x=var, fill=scaled.val)) +
    geom_tile() +
    scale_fill_viridis_c() +
    coord_cartesian(expand=0) +
    scale_y_discrete(position='right')
) |>
insert_hclust(cluster_by = scaled.val, dist_method = 'canberra', hclust_method = 'ward.D2')

Dendrogram on the alternate side#

(
    data |>
    ggplot(aes(y=model, x=var, fill=scaled.val)) +
    geom_tile() +
    scale_fill_viridis_c() +
    coord_cartesian(expand=0) +
    scale_x_discrete(position='top')
) |>
insert_hclust(cluster_by = scaled.val, side=c('right','bottom'), dist_method = 'canberra', hclust_method = 'ward.D2')

Heatmap with annotations#

data <-
    dist(mtcars, method='canberra') |>
    as.matrix() |>
    as.data.frame.table(responseName='canberra.dist') |>
    inner_join(rownames_to_column(mtcars,'model'), by=c('Var1'='model'))
head(data, 3)
A data.frame: 3 × 14
Var1Var2canberra.distmpgcyldisphpdratwtqsecvsamgearcarb
<chr><fct><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
1Mazda RX4 Mazda RX40.000000000000000021.061601103.902.62016.460144
2Mazda RX4 WagMazda RX40.069445450028971621.061601103.902.87517.020144
3Datsun 710 Mazda RX42.247355900873809122.84108 933.852.32018.611141
(
    data |>
    ggplot(aes(x=Var1, y=Var2, fill=canberra.dist)) +
    geom_tile() +
    coord_cartesian(expand=0) +
    scale_y_discrete(position='right') +
    theme(axis.text.x=element_text(angle=45,vjust=1,hjust=1)) +
    scale_fill_viridis_c()
) |>
insert_annot(side = 'top', var = hp, ggextra=scale_fill_viridis_c(option='inferno')) |>
insert_annot(side = 'top', var = mpg, ggextra=scale_fill_viridis_c(option='mako')) |>
insert_annot(side = 'top', var = factor(cyl)) |>
insert_hclust(cluster_by=canberra.dist)

scRNA Heatmap#

data <-
    read_tsv(system.file("extdata", "scRNA_dotplot_data.tsv.gz", package="aplot"), col_types='') |>
    mutate(expr_perc = 100 * cell_exp_ct/cell_ct) |>
    mutate(gene_mean=mean(count), .by=Gene)
head(data,3)
A tibble: 3 × 8
Geneclustercell_ctcell_exp_ctcountGroupexpr_percgene_mean
<chr><chr><dbl><dbl><dbl><chr><dbl><dbl>
MICAc0041789100.2890587840294733Neuron 21.780756342747730.0840202740097265
MICAc013532 820.0232163080407701Microglia 2.321630804077010.0840202740097265
MICAc0233201170.0354171573797353Fibroblast 3.524096385542170.0840202740097265
(
    data |>
    ggplot(aes(y=Gene, x=cluster, color=count, size=expr_perc)) +
    geom_point() +
    scale_size(limits=c(0.5, NA), range=c(0,5.5)) +
    scale_color_viridis_c(trans='pseudo_log') +
    scale_y_discrete(position = 'right') +
    theme(axis.ticks = element_blank(),axis.line = element_blank()) +
    labs(size='Expr %', x='', y='')
) |>
insert_annot(side='top', var=Group) |>
insert_annot(side='left', var=gene_mean, ggextra=scale_fill_viridis_c(option = 'inferno')) |>
insert_hclust(cluster_by=count, hclust_method='ward.D2')
Warning message:
"Removed 193 rows containing missing values or values outside the scale range (`geom_point()`)."
../../_images/a37a850ef45ec05bd67ffc48a359bb67a1e61f7d1348b38d53867004afedc06c.png

alternative: ggh4x#

see: https://teunbrand.github.io/ggh4x/articles/PositionGuides.html#dendrograms

library(ggh4x)
clst1 <- 
    scale(mtcars) |>
    dist() |>
    hclust("ward.D2")
clst2 <- 
    scale(mtcars) |>
    t() |>
    dist() |>
    hclust("ward.D2")

df <- 
    scale(mtcars) |>
    as_tibble(rownames = 'model') |>
    pivot_longer(names_to = 'vars', values_to = 'value', -model)

ggplot(df, aes(vars, model, fill = value)) +
    geom_raster() +
    scale_fill_viridis_c() +
    coord_cartesian(expand=0) +
    labs(y=NULL,x=NULL) +
    scale_y_dendrogram(hclust = clst1, guide = guide_dendro(label = FALSE)) +
    scale_x_dendrogram(hclust = clst2, position = 'top', guide = guide_dendro(label = FALSE)) +
    guides(y.sec = guide_axis(), x.sec=guide_axis())
Warning message:
"The S3 guide system was deprecated in ggplot2 3.5.0.
 It has been replaced by a ggproto system that can be extended."
../../_images/0a78ec607f9b8b92ce18775a4e6663a1ce4513150e3eabd6849cfbc32ff85a4f.png