【原】超详细的R语言热图之complexheatmap系列2

阿越就是我 2023-10-12 发布于上海

展开全文

我们继续学习complexheatmap包的内容，这是本系列的第2篇文章。

本篇主要学习热图分割，行名/列名设置，热图的小格子自定义。

本系列内容非常多，将通过多篇推文逐渐介绍，欢迎大家关注我的公众号：医学和生信笔记。

本系列是对ComplexeHeatmap包的中文翻译（1-6章），部分内容根据自己的理解有适当的改动，但总体不影响原文。如有不明之处，以原文为准。原文请见：https://jokergoo./ComplexHeatmap-reference/book/

2.6 行名/列名

默认显示，如果不想显示行名/列名，使用show_row_names和show_column_names参数

Heatmap(mat, name = "mat", show_row_names = F, show_column_names = F)

调整位置，使用row_names_side和column_names_side：

Heatmap(mat, name = "mat", row_names_side = "left", row_dend_side = "right", 
    column_names_side = "top", column_dend_side = "bottom")

调整行名/列名样式，使用row_names_gp和column_names_gp:

Heatmap(mat, name = "mat", row_names_gp = gpar(fontsize = 20), column_names_gp = gpar(col = c(rep("red", 10), rep("blue", 8))))

居中对齐：

Heatmap(mat, name = "mat", row_names_centered = TRUE, column_names_centered = TRUE)

旋转方向：

Heatmap(mat, name = "mat", column_names_rot = 45)

Heatmap(mat, name = "mat", column_names_rot = 45, column_names_side = "top",
    column_dend_side = "bottom")

行名/列名太长怎么办？也能调整：

mat2 = mat
rownames(mat2)[1] = paste(c(letters, LETTERS), collapse = "")
Heatmap(mat2, name = "mat", row_title = "default row_names_max_width")

Heatmap(mat2, name = "mat", row_title = "row_names_max_width as length of a*",
    row_names_max_width = max_text_width(
        rownames(mat2), 
        gp = gpar(fontsize = 12)
    ))

自定义行名/列名，可用于解决原始矩阵行名/列名不能有重复的问题，或使用特殊符号等：

# use a named vector to make sure the correspondance between row names and row labels is correct
row_labels = structure(paste0(letters[1:24], 1:24), names = paste0("row", 1:24))
column_labels = structure(paste0(LETTERS[1:24], 1:24), names = paste0("column", 1:24))
row_labels

##  row1  row2  row3  row4  row5  row6  row7  row8  row9 row10 row11 row12 row13 
##  "a1"  "b2"  "c3"  "d4"  "e5"  "f6"  "g7"  "h8"  "i9" "j10" "k11" "l12" "m13" 
## row14 row15 row16 row17 row18 row19 row20 row21 row22 row23 row24 
## "n14" "o15" "p16" "q17" "r18" "s19" "t20" "u21" "v22" "w23" "x24"

Heatmap(mat, name = "mat", row_labels = row_labels[rownames(mat)], 
    column_labels = column_labels[colnames(mat)])

Heatmap(mat, name = "mat", row_labels = expression(alpha, beta, gamma, delta, epsilon, 
    zeta, eta, theta, iota, kappa, lambda, mu, nu, xi, omicron, pi, rho, sigma))

2.7 热图分割

主要通过四个参数调整：

row_km
row_split
column_km
column_split

2.7.1 通过K-means方法分割

Heatmap(mat, name = "mat", row_km = 2)

Heatmap(mat, name = "mat", column_km = 3)

Heatmap(mat, name = "mat", row_km = 2, column_km = 3)

2.7.2 通过离散型变量分割

# split by a vector
Heatmap(mat, name = "mat", 
    row_split = rep(c("A", "B"), 9), column_split = rep(c("C", "D"), 12))

# split by a data frame
Heatmap(mat, name = "mat", 
    row_split = data.frame(rep(c("A", "B"), 9), rep(c("C", "D"), each = 9)))

# split on both dimensions
Heatmap(mat, name = "mat", row_split = factor(rep(c("A", "B"), 9)),
    column_split = factor(rep(c("C", "D"), 12)))

2.7.3 通过聚类树分割

Heatmap(mat, name = "mat", row_split = 2, column_split = 3)

dend = hclust(dist(mat))
dend = color_branches(dend, k = 2)
Heatmap(mat, name = "mat", cluster_rows = dend, row_split = 2)

split = data.frame(cutree(hclust(dist(mat)), k = 2), rep(c("A", "B"), 9))
Heatmap(mat, name = "mat", row_split = split)

2.7.4 切片顺序

默认情况下，当把row_split/column_split设置为类别变量（向量或数据框）或设置row_km/column_km时，会对切片的平均值使用聚类，以显示切片级别中的层次结构。在这种情况下，无法精确地控制切片的顺序，因为它是由切片的聚类控制的。但是可以将cluster_row_slices或cluster_column_slices设置为FALSE以关闭切片聚类，然后就可以精确地控制切片的顺序了。

如果没有切片聚类，则可以通过row_split/column_split中的每个变量的级别来控制每个切片的顺序（在这种情况下，每个变量应该是一个因子）。如果所有变量都是字符，则默认顺序为unique(row_split)或unique(column_split)。

Heatmap(mat, name = "mat", 
  row_split = rep(LETTERS[1:3], 6),
    column_split = rep(letters[1:6], 4))

# clustering is similar as previous heatmap with branches in some nodes in the dendrogram flipped
Heatmap(mat, name = "mat", 
  row_split = factor(rep(LETTERS[1:3], 6), levels = LETTERS[3:1]),
    column_split = factor(rep(letters[1:6], 4), levels = letters[6:1]))

# now the order is exactly what we set
Heatmap(mat, name = "mat", 
  row_split = factor(rep(LETTERS[1:3], 6), levels = LETTERS[3:1]),
    column_split = factor(rep(letters[1:6], 4), levels = letters[6:1]),
    cluster_row_slices = FALSE, 
    cluster_column_slices = FALSE)

2.7.5 分割标题

split = data.frame(rep(c("A", "B"), 9), rep(c("C", "D"), each = 9))
Heatmap(mat, name = "mat", row_split = split, row_title = "%s|%s")

map = c("A" = "aaa", "B" = "bbb", "C" = "333", "D" = "444")
Heatmap(mat, name = "mat", row_split = split, row_title = "@{map[ x[1] ]}|@{map[ x[2] ]}")

Heatmap(mat, name = "mat", row_split = split, row_title = "{map[ x[1] ]}|{map[ x[2] ]}")

Heatmap(mat, name = "mat", row_split = split, row_title = "%s|%s", row_title_rot = 0)

Heatmap(mat, name = "mat", row_split = 2, row_title = "cluster_%s")

Heatmap(mat, name = "mat", row_split = split, 
    row_title = c("top_slice", "middle_top_slice", "middle_bottom_slice", "bottom_slice"),
    row_title_rot = 0)

Heatmap(mat, name = "mat", row_split = split, row_title = "there are four slices")

ht = Heatmap(mat, name = "mat", row_split = split, row_title = "%s|%s")
# This row_title is actually a heatmap-list-level row title
draw(ht, row_title = "I am a row title")

Heatmap(mat, name = "mat", row_split = split, row_title = NULL)

2.7.6 分割的图形参数

# 默认情况下标题顶部没有空间，现在我们增加4pt的空间
ht_opt$TITLE_PADDING = unit(c(4, 4), "points")
Heatmap(mat, name = "mat", 
    row_km = 2, row_title_gp = gpar(col = c("red", "blue"), font = 1:2),
    row_names_gp = gpar(col = c("green", "orange"), fontsize = c(10, 14)),
    column_km = 3, column_title_gp = gpar(fill = c("red", "blue", "green"), font = 1:3),
    column_names_gp = gpar(col = c("green", "orange", "purple"), fontsize = c(10, 14, 8)))

2.7.7 分割宽度

Heatmap(mat, name = "mat", row_km = 3, row_gap = unit(5, "mm"))

Heatmap(mat, name = "mat", row_km = 3, row_gap = unit(c(2, 4), "mm"))

Heatmap(mat, name = "mat", row_km = 2, column_km = 3, border = TRUE)

Heatmap(mat, name = "mat", row_km = 2, column_km = 3, 
    row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), border = TRUE)

2.7.8 分割注释条

Heatmap(mat, name = "mat", row_km = 2, column_km = 3,
    top_annotation = HeatmapAnnotation(foo1 = 1:24, bar1 = anno_points(runif(24))),
    right_annotation = rowAnnotation(foo2 = 18:1, bar2 = anno_barplot(runif(18)))
)

2.8 光栅图（略）

2.9 自定义热图主体

2.9.1 `cell_fun`

用来调整每一个小格子，共有7个参数：

i: 行索引
j: 列索引
x: 小格子中心点横坐标
y: 小格子中心点纵坐标
width: 小格子宽度
height: 小格子高度
fill: 小格子填充色

最常见的用法是在热图中添加数字：

small_mat = mat[1:9, 1:9]
col_fun = colorRamp2(c(-2, 0, 2), c("green", "white", "red"))
Heatmap(small_mat, name = "mat", col = col_fun,
    cell_fun = function(j, i, x, y, width, height, fill) {
        grid.text(sprintf("%.1f", small_mat[i, j]), x, y, gp = gpar(fontsize = 10))
})

Heatmap(small_mat, name = "mat",  col = col_fun,
    cell_fun = function(j, i, x, y, width, height, fill) {
        if(small_mat[i, j] > 0)
            grid.text(sprintf("%.1f", small_mat[i, j]), x, y, gp = gpar(fontsize = 10))
})

可视化相关性矩阵：

cor_mat = cor(small_mat)
od = hclust(dist(cor_mat))$order
cor_mat = cor_mat[od, od]
nm = rownames(cor_mat)
col_fun = circlize::colorRamp2(c(-1, 0, 1), c("green", "white", "red"))
# `col = col_fun` here is used to generate the legend
Heatmap(cor_mat, name = "correlation", col = col_fun, rect_gp = gpar(type = "none"), 
    cell_fun = function(j, i, x, y, width, height, fill) {
        grid.rect(x = x, y = y, width = width, height = height, 
            gp = gpar(col = "grey", fill = NA))
        if(i == j) {
            grid.text(nm[i], x = x, y = y)
        } else if(i > j) {
            grid.circle(x = x, y = y, r = abs(cor_mat[i, j])/2 * min(unit.c(width, height)), 
                gp = gpar(fill = col_fun(cor_mat[i, j]), col = NA))
        } else {
            grid.text(sprintf("%.1f", cor_mat[i, j]), x, y, gp = gpar(fontsize = 10))
        }
    }, cluster_rows = FALSE, cluster_columns = FALSE,
    show_row_names = FALSE, show_column_names = FALSE)

画一个棋盘：

str = "B[cp];W[pq];B[dc];W[qd];B[eq];W[od];B[de];W[jc];B[qk];W[qn]
;B[qh];W[ck];B[ci];W[cn];B[hc];W[je];B[jq];W[df];B[ee];W[cf]
;B[ei];W[bc];B[ce];W[be];B[bd];W[cd];B[bf];W[ad];B[bg];W[cc]
;B[eb];W[db];B[ec];W[lq];B[nq];W[jp];B[iq];W[kq];B[pp];W[op]
;B[po];W[oq];B[rp];W[ql];B[oo];W[no];B[pl];W[pm];B[np];W[qq]
;B[om];W[ol];B[pk];W[qp];B[on];W[rm];B[mo];W[nr];B[rl];W[rk]
;B[qm];W[dp];B[dq];W[ql];B[or];W[mp];B[nn];W[mq];B[qm];W[bp]
;B[co];W[ql];B[no];W[pr];B[qm];W[dd];B[pn];W[ed];B[bo];W[eg]
;B[ef];W[dg];B[ge];W[gh];B[gf];W[gg];B[ek];W[ig];B[fd];W[en]
;B[bn];W[ip];B[dm];W[ff];B[cb];W[fe];B[hp];W[ho];B[hq];W[el]
;B[dl];W[fk];B[ej];W[fp];B[go];W[hn];B[fo];W[em];B[dn];W[eo]
;B[gp];W[ib];B[gc];W[pg];B[qg];W[ng];B[qc];W[re];B[pf];W[of]
;B[rc];W[ob];B[ph];W[qo];B[rn];W[mi];B[og];W[oe];B[qe];W[rd]
;B[rf];W[pd];B[gm];W[gl];B[fm];W[fl];B[lj];W[mj];B[lk];W[ro]
;B[hl];W[hk];B[ik];W[dk];B[bi];W[di];B[dj];W[dh];B[hj];W[gj]
;B[li];W[lh];B[kh];W[lg];B[jn];W[do];B[cl];W[ij];B[gk];W[bl]
;B[cm];W[hk];B[jk];W[lo];B[hi];W[hm];B[gk];W[bm];B[cn];W[hk]
;B[il];W[cq];B[bq];W[ii];B[sm];W[jo];B[kn];W[fq];B[ep];W[cj]
;B[bk];W[er];B[cr];W[gr];B[gk];W[fj];B[ko];W[kp];B[hr];W[jr]
;B[nh];W[mh];B[mk];W[bb];B[da];W[jh];B[ic];W[id];B[hb];W[jb]
;B[oj];W[fn];B[fs];W[fr];B[gs];W[es];B[hs];W[gn];B[kr];W[is]
;B[dr];W[fi];B[bj];W[hd];B[gd];W[ln];B[lm];W[oi];B[oh];W[ni]
;B[pi];W[ki];B[kj];W[ji];B[so];W[rq];B[if];W[jf];B[hh];W[hf]
;B[he];W[ie];B[hg];W[ba];B[ca];W[sp];B[im];W[sn];B[rm];W[pe]
;B[qf];W[if];B[hk];W[nj];B[nk];W[lr];B[mn];W[af];B[ag];W[ch]
;B[bh];W[lp];B[ia];W[ja];B[ha];W[sf];B[sg];W[se];B[eh];W[fh]
;B[in];W[ih];B[ae];W[so];B[af]"

str = gsub("\\n", "", str)
step = strsplit(str, ";")[[1]]
type = gsub("(B|W).*", "\\1", step)
row = gsub("(B|W)\\[(.).\\]", "\\2", step)
column = gsub("(B|W)\\[.(.)\\]", "\\2", step)

go_mat = matrix(nrow = 19, ncol = 19)
rownames(go_mat) = letters[1:19]
colnames(go_mat) = letters[1:19]
for(i in seq_along(row)) {
    go_mat[row[i], column[i]] = type[i]
}
go_mat[1:4, 1:4]

##   a   b   c   d  
## a NA  NA  NA  "W"
## b "W" "W" "W" "B"
## c "B" "B" "W" "W"
## d "B" "W" "B" "W"

Heatmap(go_mat, name = "go", rect_gp = gpar(type = "none"),
    cell_fun = function(j, i, x, y, w, h, col) {
        grid.rect(x, y, w, h, gp = gpar(fill = "#dcb35c", col = NA))
        if(i == 1) {
            grid.segments(x, y-h*0.5, x, y)
        } else if(i == nrow(go_mat)) {
            grid.segments(x, y, x, y+h*0.5)
        } else {
            grid.segments(x, y-h*0.5, x, y+h*0.5)
        }
        if(j == 1) {
            grid.segments(x, y, x+w*0.5, y)        
        } else if(j == ncol(go_mat)) {
            grid.segments(x-w*0.5, y, x, y)
        } else {
            grid.segments(x-w*0.5, y, x+w*0.5, y)
        }

        if(i %in% c(4, 10, 16) & j %in% c(4, 10, 16)) {
            grid.points(x, y, pch = 16, size = unit(2, "mm"))
        }
                
        r = min(unit.c(w, h))*0.45
        if(is.na(go_mat[i, j])) {
        } else if(go_mat[i, j] == "W") {
            grid.circle(x, y, r, gp = gpar(fill = "white", col = "white"))
        } else if(go_mat[i, j] == "B") {
            grid.circle(x, y, r, gp = gpar(fill = "black", col = "black"))
        }
    },
    col = c("B" = "black", "W" = "white"),
    show_row_names = FALSE, show_column_names = FALSE,
    column_title = "One famous GO game",
    heatmap_legend_param = list(title = "Player", at = c("B", "W"), 
        labels = c("player1", "player2"), border = "black")
)

2.9.2 `layer_fun`

用法差不多，但是更复杂

# code only for demonstration
Heatmap(..., layer_fun = function(j, i, x, y, w, h, fill) {...})
# or you can capitalize the arguments to mark they are vectors,
# the names of the argumetn do not matter
Heatmap(..., layer_fun = function(J, I, X, Y, W, H, F) {...})

col_fun = colorRamp2(c(-2, 0, 2), c("green", "white", "red"))
Heatmap(small_mat, name = "mat", col = col_fun,
    layer_fun = function(j, i, x, y, width, height, fill) {
        # since grid.text can also be vectorized
        grid.text(sprintf("%.1f", pindex(small_mat, i, j)), x, y, gp = gpar(fontsize = 10))
})

Heatmap(small_mat, name = "mat", col = col_fun,
    row_km = 2, column_km = 2,
    layer_fun = function(j, i, x, y, width, height, fill) {
        v = pindex(small_mat, i, j)
        grid.text(sprintf("%.1f", v), x, y, gp = gpar(fontsize = 10))
        if(sum(v > 0)/length(v) > 0.75) {
            grid.rect(gp = gpar(lwd = 2, fill = "transparent"))
        }
})

Heatmap(small_mat, name = "mat", col = col_fun,
    row_km = 2, column_km = 2,
    layer_fun = function(j, i, x, y, w, h, fill) {
        # restore_matrix() is explained after this chunk of code
        ind_mat = restore_matrix(j, i, x, y)
        for(ir in seq_len(nrow(ind_mat))) {
            # start from the second column
            for(ic in seq_len(ncol(ind_mat))[-1]) {
                ind1 = ind_mat[ir, ic-1] # previous column
                ind2 = ind_mat[ir, ic]   # current column
                v1 = small_mat[i[ind1], j[ind1]]
                v2 = small_mat[i[ind2], j[ind2]]
                if(v1 * v2 > 0) { # if they have the same sign
                    col = ifelse(v1 > 0, "darkred", "darkgreen")
                    grid.segments(x[ind1], y[ind1], x[ind2], y[ind2],
                        gp = gpar(col = col, lwd = 2))
                    grid.points(x[c(ind1, ind2)], y[c(ind1, ind2)], 
                        pch = 16, gp = gpar(col = col), size = unit(4, "mm"))
                }
            }
        }
    }
)

2.10 热图大小

heatmap_width和heatmap_height控制整个热图的大小（包括图例），width和height只控制热图主体的大小

Heatmap(mat, name = "mat", width = unit(8, "cm"), height = unit(8, "cm"))

Heatmap(mat, name = "mat", heatmap_width = unit(8, "cm"), heatmap_height = unit(8, "cm"))

sessionInfo()

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Chinese (Simplified)_China.936 
## [2] LC_CTYPE=Chinese (Simplified)_China.936   
## [3] LC_MONETARY=Chinese (Simplified)_China.936
## [4] LC_NUMERIC=C                              
## [5] LC_TIME=Chinese (Simplified)_China.936    
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] seriation_1.3.0      dendsort_0.3.4       dendextend_1.15.1   
## [4] circlize_0.4.13      ComplexHeatmap_2.8.0
## 
## loaded via a namespace (and not attached):
##  [1] shape_1.4.6         GetoptLong_1.0.5    tidyselect_1.1.1   
##  [4] xfun_0.25           purrr_0.3.4         colorspace_2.0-2   
##  [7] vctrs_0.3.8         generics_0.1.0      viridisLite_0.4.0  
## [10] stats4_4.1.0        utf8_1.2.2          rlang_0.4.11       
## [13] pillar_1.6.2        glue_1.4.2          DBI_1.1.1          
## [16] BiocGenerics_0.38.0 RColorBrewer_1.1-2  registry_0.5-1     
## [19] matrixStats_0.60.0  foreach_1.5.1       lifecycle_1.0.0    
## [22] stringr_1.4.0       munsell_0.5.0       gtable_0.3.0       
## [25] GlobalOptions_0.1.2 codetools_0.2-18    evaluate_0.14      
## [28] knitr_1.33          IRanges_2.26.0      Cairo_1.5-12.2     
## [31] doParallel_1.0.16   parallel_4.1.0      fansi_0.5.0        
## [34] highr_0.9           Rcpp_1.0.7          scales_1.1.1       
## [37] S4Vectors_0.30.0    magick_2.7.3        gridExtra_2.3      
## [40] rjson_0.2.20        ggplot2_3.3.5       png_0.1-7          
## [43] digest_0.6.27       gclus_1.3.2         stringi_1.7.3      
## [46] dplyr_1.0.7         clue_0.3-59         tools_4.1.0        
## [49] magrittr_2.0.1      tibble_3.1.3        cluster_2.1.2      
## [52] crayon_1.4.1        pkgconfig_2.0.3     ellipsis_0.3.2     
## [55] viridis_0.6.1       assertthat_0.2.1    iterators_1.0.13   
## [58] TSP_1.1-10          R6_2.5.1            compiler_4.1.0