载入数据 1Sys.setlocale('LC_ALL','C') 2load(file = "F:/Bioinfor_project/Breast/AS_research/AS/result/hubgene.Rdata") 3head(data) 4require(cowplot) 5require(tidyverse) 6require(ggplot2) 7require(ggsci) 8require(ggpubr) 9mydata<-data %>% 10 ## 基因表达数据gather,gather的范围应调整 11 gather(key="gene",value="Expression",CCL14:TUBB3) %>% 12 ## 13 dplyr::select(ID,gene,Expression,everything()) 14head(mydata) ## 每个基因作为一个变量的宽数据
创建带有pvalue的箱线图1p <- ggboxplot(mydata, x = "group", y = "Expression", 2 color = "group", palette = "jama", 3 add = "jitter") 4# Add p-value 5p + stat_compare_means()
改变统计方法1# Change method 2p + stat_compare_means(method = "t.test")
统计学意义标注label="p.signif" p.format等 label.x标注位置
1p + stat_compare_means( label = "p.signif")
多组比较1# Default method = "kruskal.test" for multiple groups 2ggboxplot(mydata, x = "gene", y = "Expression", 3 color = "gene",add="jitter", palette = "jama")+ 4 stat_compare_means() 5 6# Change method to anova 7ggboxplot(mydata, x = "gene", y = "Expression", 8 color = "gene", add="jitter", palette = "jama")+ 9 stat_compare_means(method = "anova")
指定比较 1require(ggpubr) 2compare_means(Expression ~ gene, data = mydata) 3 4## 指定自己想要的比较 5# Visualize: Specify the comparisons you want 6my_comparisons <- list( c("CCL14", "HBA1"), c("HBA1", "CCL16"), c("CCL16", "TUBB3") ) 7ggboxplot(mydata, x = "gene", y = "Expression", 8 color = "group",add = "jitter", palette = "jama")+ 9 stat_compare_means(comparisons = my_comparisons)#+ # Add pairwise comparisons p-value 10 #stat_compare_means() # Add global p-value
指定参考组指定CCL14作为参考组与其它各组比较 ref.group
1compare_means(Expression ~ gene, data = mydata, ref.group = "CCL14", 2 method = "t.test") 3# Visualize 4mydata %>% 5 filter(group=="TNBC") %>% # 筛选TNBC数据 6ggboxplot( x = "gene", y = "Expression", 7 color = "gene",add = "jitter", palette = "nejm")+ 8 stat_compare_means(method = "anova")+ # Add global p-value 9 stat_compare_means(label = "p.signif", method = "t.test", 10 ref.group = "CCL14")
多基因分面按另外一个变量分组比较
1## 比较各个基因在TNBC与Normal表达 2compare_means( Expression ~ group, data = mydata, 3 group.by = "gene") 4# Box plot facetted by "gene" 5p <- ggboxplot(mydata, x = "group", y = "Expression", 6 color = "group", palette = "jco", 7 add = "jitter", 8 facet.by = "gene", short.panel.labs = FALSE) 9# Use only p.format as label. Remove method name. 10p + stat_compare_means(label = "p.format")
将pvalue换成星号1p + stat_compare_means(label = "p.signif", label.x = 1.5)
将各个图绘制在一张图中1p <- ggboxplot(mydata, x = "gene", y = "Expression", 2 color = "group", palette = "nejm", 3 add = "jitter") 4p + stat_compare_means(aes(group = group))
修改下pvalue展示的方式1# Show only p-value 2p + stat_compare_means(aes(group = group), label = "p.format")
用星号表示pvalue1# Use significance symbol as label 2p + stat_compare_means(aes(group = group), label = "p.signif")
配对样本比较要求x,y具有相同的样本数,进行一一配对比较
1head(ToothGrowth) 2compare_means(len ~ supp, data = ToothGrowth, 3 group.by = "dose", paired = TRUE) 4# Box plot facetted by "dose" 5p <- ggpaired(ToothGrowth, x = "supp", y = "len", 6 color = "supp", palette = "jama", 7 line.color = "gray", line.size = 0.4, 8 facet.by = "dose", short.panel.labs = FALSE) 9# Use only p.format as label. Remove method name. 10p + stat_compare_means(label = "p.format", paired = TRUE)
封装为函数命名为group_box 1head(mydata) 2group_box<-function(group=group,data=mydata){ 3 p <- ggboxplot(mydata, x = "gene", y = "Expression", 4 color = group, 5 palette = "nejm", 6 add = "jitter") 7p + stat_compare_means(aes(group = group)) 8} 9 10## 11group_box(group="PAM50",data = mydata)
封装为函数命名为group_box 1head(mydata) 2group_box<-function(group=group,data=mydata){ 3 p <- ggboxplot(mydata, x = "gene", y = "Expression", 4 color = group, 5 palette = "nejm", 6 add = "jitter") 7p + stat_compare_means(aes(group = group)) 8} 9 10## 11group_box(group="PAM50",data = mydata)
封装函数gene_box- 注意这时使用的应该是基因的宽数据,因为涉及到单个基因作为变量 1head(data) 2usedata<-data 3## 封装函数 4gene_box<-function(gene="CCL14",group="group",data=usedata){ 5p <- ggboxplot(data, x = group, y = gene, 6 ylab = sprintf("Expression of %s",gene), 7 xlab = group, 8 color = group, 9 palette = "nejm", 10 add = "jitter") 11p + stat_compare_means(aes(group = group)) 12}
1gene_box(gene="CCL14")
牛刀小试1gene_box(gene="CCL16",group="PAM50")
批量绘制目的功能:绘制任意基因,任意分组,批量绘制一气呵成了 封装函数+lapply批量绘制无敌 在lapply中的函数参数设置,不在原函数中,而是直接放置在lapply中 do.call中参数1为函数,+c()包含原函数的参数设置,同样参数设置不在原函数中
1require(gridExtra) 2head(data) 3 4## 需要批量绘制的基因名 5name<-colnames(data)[3:6] 6## 批量绘图 7p<-lapply(name,gene_box,group = "T_stage") 8## 组图 9do.call(grid.arrange,c(p,ncol=2))
|