R语言画图——添加显著性检验标记

枫林秋2016 2023-09-10

展开全文

简单整理一下画图代码，方便以后使用

生成示例数据

set.seed(12345)
exprSet = matrix(rnorm(480,mean = 6),ncol = 120)
exprSet = round(exprSet,2)
rownames(exprSet) = paste0("Gene",1:4)
colnames(exprSet) = paste0("Sample",1:120)
exprSet[,50:80] = exprSet[,50:80] + 2
exprSet[,90:120] = exprSet[,90:120] + 4
exprSet[1:4,1:4]
#      Sample1 Sample2 Sample3 Sample4
#Gene1    6.59    6.61    5.72    6.37
#Gene2    6.71    4.18    5.08    6.52
#Gene3    5.89    6.63    5.88    5.25
#Gene4    5.55    5.72    7.82    6.82
pdat <- reshape2::melt(exprSet)
colnames(pdat) <- c("Gene","Sample","Expression")
pdat$Group = rep(rep(c("Control","Treat1","Treat2"),each = 40),each = 4)
head(pdat)
#   Gene  Sample Expression   Group
#1 Gene1 Sample1       6.59 Control
#2 Gene2 Sample1       6.71 Control
#3 Gene3 Sample1       5.89 Control
#4 Gene4 Sample1       5.55 Control
#5 Gene1 Sample2       6.61 Control
#6 Gene2 Sample2       4.18 Control
library(tidyverse)
df <- pdat %>%
      mutate(Group = as.factor(Group)) %>% 
      group_by(Group) %>%
      summarise(value_mean = mean(Expression),sd = sd(Expression),se = sd(Expression)/sqrt(n()))


library(ggplot2)
library(patchwork)
theme <- theme_bw(base_size = 20) + 
         theme(axis.line = element_line(colour = 'black'), 
               plot.title = element_text(hjust = 0.5),
               axis.title.x = element_blank(), 
               legend.title = element_blank(),
               legend.position = "none",
               legend.background = element_rect(fill = 'transparent'))
p1 <- ggplot(pdat, aes(x = Gene, y = Expression)) +
      geom_boxplot(aes(fill = Group),outlier.shape = NA) +
      scale_fill_manual(values = c("#2fa1dd", "#f87669", "#e6b707")) +
      scale_y_continuous(expand = c(0,0),limit = c(3,14)) +
      theme
p2 <- ggplot(pdat, aes(x = Group, y = Expression, fill = Group)) +
      geom_boxplot(aes(fill = Group),outlier.shape = NA) +
      scale_fill_manual(values = c("#2fa1dd", "#f87669", "#e6b707")) +
      scale_y_continuous(expand = c(0,0),limit = c(3,14)) +
      theme
p3 <- ggplot(df,aes(x = Group,y = value_mean)) +
      geom_errorbar(aes(ymax = value_mean + sd, ymin = value_mean - sd),width = 0.1,color = "grey30")+
      geom_col(width = 0.75,aes(fill = Group))+
      scale_y_continuous(expand = c(0,0),limit = c(0,12)) +
      scale_fill_brewer(palette="Blues") + 
      theme
p1 + p2 + p3 + plot_layout(width = c(3,1,1))

形式一：顶部标记

显著性统一标注在顶部上，不考虑两两分组多次比较。两组默认执行wilcox.test（paired = FALSE），三组及以上默认执行kruska-wallis。

library(ggpubr)
#p2 + stat_compare_means(size = 8,label.y = 12.5)
#p2 + stat_compare_means(method = "anova",size = 8,label.y = 12.5)
##"p.signif" (shows the significance levels), "p.format" (shows the formatted p value).
p2 + stat_compare_means(method = "anova",size = 8,label.y = 12.5)
p2 + stat_compare_means(method = "anova",size = 8,label = "p.signif",label.y = 12.5)
p2 + stat_compare_means(method = "anova",size = 8,label = "p.format",label.y = 12.5)
p2 + stat_compare_means(aes(group = Group,label = after_stat(p.format)),
                        method = "anova",size = 8,label.y = 12.5)

p1 + stat_compare_means(aes(group = Group,label = after_stat(p.signif)),size = 8,label.y = 12.5)
p1 + stat_compare_means(aes(group = Group),size = 6,label = "p.format",label.y = 12.5)
p1 + stat_compare_means(aes(group = Group,label = after_stat(p.format)),size = 8,label.y = 12.5)

形式二：两两之间分组比较

1、ggpubr

library(ggpubr)
my_comparisons = list(c("Control", "Treat1"), c("Treat1", "Treat2"), c("Control", "Treat2"))
p2 + stat_compare_means(comparisons = my_comparisons,method = "t.test",
                        map_signif_level=function(p)sprintf("p = %.2g", p),size = 8)
p2 + stat_compare_means(comparisons = my_comparisons,method = "t.test",label = "p.signif",size = 8)#label.x, label.y

2、ggsignif

p2 + geom_signif(comparisons = my_comparisons)
p2 + geom_signif(comparisons = my_comparisons,map_signif_level= function(p)sprintf("p = %.2g", p))
p2 + geom_signif(comparisons = my_comparisons,map_signif_level = T,y_position = c(12,13,14),size = 0.5,textsize = 8)

手动选择标记位置/标记内容：

p2 + geom_signif(y_position = c(12, 13),xmin = c(1, 2),xmax = c(2, 3),
                 annotations = c("****","****"),tip_length = 0.05,#横线两侧折线长度
                 size = 0.5,textsize = 8) +
     geom_signif(comparisons = list(c("Control", "Treat2")),
                 y_position = 14,tip_length = 0,vjust = 0,size = 0.5,textsize = 8,map_signif_level = T)
p2 + geom_signif(annotations = c("First", "Second"),
                 y_position = c(12, 13),xmin = c(1, 2),xmax = c(2, 3),textsize = 8)

TCGA数据示例：

library(ggpubr)
Data_summary <- as.data.frame(compare_means(Expression~Group, exprSet, method = "wilcox.test", 
                              paired = FALSE,group.by = "Cancer"))       
stat.test <- Data_summary[,-c(2,9)]
stat.test$xmin = c(1.5,3.5,5.5,7.5,9.5,12.5,14.5,16.5,18.5,20.5,22.5,26.5,
                   28.5,30.5,34.5,36.5,38.5,40.5,44.5,47.5,50.5) + 0.3  
stat.test$xmax = c(1.5,3.5,5.5,7.5,9.5,12.5,14.5,16.5,18.5,20.5,22.5,26.5,
                   28.5,30.5,34.5,36.5,38.5,40.5,44.5,47.5,50.5) + 1.7   
stat.test$p.signif[stat.test$p.signif == "ns"] <- NA
p3 <- p2 + geom_signif(xmin = stat.test$xmin,xmax = stat.test$xmax,
                       annotations = stat.test$p.signif,margin_top = 0.00,
                       y_position = 9.2,size = 0.65,textsize = 7.5,
                       tip_length = 0)#横线两侧折线长度
p3

3、rstatix

用rstatix自带函数计算显著性与添加显著性的横纵坐标位置，再用stat_pvalue_manual添加上去。

以下默认参数，手动计算时需要注意对应列名：

· y.position = "y.position"
· xmin = "group1"
· xmax = "group2"

library(rstatix)
stat.test <- pdat %>% 
  group_by(Gene) %>% 
  wilcox_test(Expression ~ Group) %>%
  adjust_pvalue() %>% 
  add_significance("p.adj") %>% 
  add_xy_position(x = "Gene")
data.frame(stat.test)
#    Gene      .y.    group1 group2 n1 n2 statistic        p     p.adj p.adj.signif y.position      groups x      xmin     xmax
#1  Gene1 Expression Control Treat1 40 40     275.5 4.60e-07 3.680e-06     ****    12.7452 Control, Treat1 1 0.7333333 1.000000
#2  Gene1 Expression Control Treat2 40 40     262.0 2.31e-07 2.079e-06     ****    13.5030 Control, Treat2 1 0.7333333 1.266667
#3  Gene1 Expression  Treat1 Treat2 40 40     401.0 1.26e-04 1.260e-04      ***    14.2608  Treat1, Treat2 1 1.0000000 1.266667
#4  Gene2 Expression Control Treat1 40 40     325.5 5.08e-06 2.540e-05     ****    11.3952 Control, Treat1 2 1.7333333 2.000000
#5  Gene2 Expression Control Treat2 40 40     134.0 1.51e-10 1.661e-09     ****    12.1530 Control, Treat2 2 1.7333333 2.266667
#6  Gene2 Expression  Treat1 Treat2 40 40     339.0 9.37e-06 3.748e-05     ****    12.9108  Treat1, Treat2 2 2.0000000 2.266667
#7  Gene3 Expression Control Treat1 40 40     291.5 1.02e-06 7.140e-06     ****    11.8352 Control, Treat1 3 2.7333333 3.000000
#8  Gene3 Expression Control Treat2 40 40     114.5 4.35e-11 5.220e-10     ****    12.5930 Control, Treat2 3 2.7333333 3.266667
#9  Gene3 Expression  Treat1 Treat2 40 40     343.5 1.14e-05 3.748e-05     ****    13.3508  Treat1, Treat2 3 3.0000000 3.266667
#10 Gene4 Expression Control Treat1 40 40     321.5 4.23e-06 2.538e-05     ****    12.7652 Control, Treat1 4 3.7333333 4.000000
#11 Gene4 Expression Control Treat2 40 40     192.5 5.19e-09 5.190e-08     ****    13.5230 Control, Treat2 4 3.7333333 4.266667
#12 Gene4 Expression  Treat1 Treat2 40 40     359.5 2.30e-05 4.600e-05     ****    14.2808  Treat1, Treat2 4 4.0000000 4.266667
stat.test$y.position <- stat.test$y.position - 1
p1 + stat_pvalue_manual(stat.test,label = "p.adj.signif",hide.ns = F,#label = "p" or label = "p.adj"
                        tip.length = 0.01,label.size = 8)

修改tip.length设置折线长度贴合图形：

library(rstatix)
stat.test <- pdat %>% wilcox_test(Expression ~ Group, ref.group = "Control") %>% 
             mutate(p.adj.signif = replace_na(p.adj.signif,""),across("p.adj.signif",str_replace,"ns","")) %>% 
             select(group1,group2,p.adj,p.adj.signif) %>% 
             left_join(.,df,by = c("group2" = "Group")) %>% 
             mutate(y.position = value_mean + sd + 0.3)
as.data.frame(stat.test)
#   group1 group2    p.adj p.adj.signif value_mean       sd        se  y.position
#1 Control Treat1 7.21e-22         ****   7.608062 1.220466 0.09648632   9.128529
#2 Control Treat2 3.06e-33         ****   9.185063 1.878471 0.14850619  11.363534
p4 <-  p3 + 
       stat_pvalue_manual(stat.test[1,],label = "p.adj.signif",
                          label.size = 6,tip.length = c(0.25,0.003),linetype = 2) +
       stat_pvalue_manual(stat.test[2,],label = "p.adj.signif",
                          label.size = 6,tip.length = c(0.15,0.003),linetype = 1)

4、手动添加 geom_text()

geom_text标记显著性

df2 <- merge(df,stat.test[,c("group2","p.adj","p.adj.signif")],by.x = "Group",by.y = "group2", all=TRUE)
df2$p.adj.signif[is.na(df2$p.adj.signif)] <- ""
#    Group value_mean       sd         se    p.adj p.adj.signif
#1 Control   6.141563 1.107652 0.08756761       NA             
#2  Treat1   7.608062 1.220466 0.09648632 7.21e-22         ****
#3  Treat2   9.185063 1.878471 0.14850619 3.06e-33         ****
p4 <- ggplot(df2,aes(x = Group,y = value_mean,fill = Group)) +
      geom_errorbar(aes(ymax = value_mean + sd, ymin = value_mean - sd),width = 0.1,color = "grey30") +
      geom_col(width = 0.75) +
      geom_text(aes(label = p.adj.signif,y = value_mean + sd*1.5),
                size = 8,color = "black",show.legend = FALSE) +
      scale_y_continuous(expand = c(0,0),limits = c(0,12.5)) +
      theme +
      scale_fill_brewer(palette = "Blues")

geom_segment绘制折线 + geom_text标记显著性

P <- P + #geom_line(data=segment,aes(x = xstart,y = ystart),cex=1, color = "black")+
     geom_segment(x = 4.25, y = 0.8, xend = 4.25, yend = 1.95, size = 1, color = "black") + 
     geom_segment(x = 4.15, y = 0.8, xend = 4.25, yend = 0.8, size = 1, color = "black") + 
     geom_segment(x = 4.15, y = 1.95, xend = 4.25, yend = 1.95, size = 1, color = "black") +
     geom_text(x = 4.3,y = 1.4,label = "***",size = 8,color = "black",angle = 270)