分享

R绘图笔记 | 柱状图绘制

 生信笔记 2021-07-05

1.单数据系列柱状图

###绘图数据data <- "Sample1;Sample2;Sample3;Sample4;Sample5gene1;2.6;2.9;2.1;4.5;2.2gene2;20.8;9.8;7.0;3.7;19.2gene3;10.0;11.0;9.2;12.4;9.6gene4;9;3.3;10.3;11.1;10"data <- read.table(text=data, header=T, row.names=1, sep=";", quote="")data

##gene1的在不同样本中的表达data1 <- as.data.frame(t(data)[,1])names(data1) <- "gene1"data1$sample <- rownames(data1)
> data1 gene1 sampleSample1 2.6 Sample1Sample2 2.9 Sample2Sample3 2.1 Sample3Sample4 4.5 Sample4Sample5 2.2 Sample5

绘图:geom_bar用于绘制柱状图,ylim设置纵轴值范围,them设置主题,axis.title设置坐标轴名称参数,axis.text设置坐标轴参数。

ggplot(data=data1,aes(x=sample,y=gene1))+ geom_bar(stat = "identity", width = 0.8,colour="black",size=0.25, fill="#FC4E07",alpha=1)+ ylim(0,max(data1$gene1))+ theme( axis.title=element_text(size=15,face="plain",color="blue"), axis.text = element_text(size=12,face="plain",color="red") )

可将数据进行排序后绘图。

#排序方法1:基于数据框data.framelibrary(dplyr)data1.a<-arrange(data1,desc(gene1))data1.a$sample <- factor(data1.a$sample, levels = data1.a$sample)ggplot(data=data1.a,aes(x=sample,y=gene1))+ geom_bar(stat = "identity", width = 0.8, colour="black",size=0.25,fill="#FC4E07",alpha=1)

#排序方法2:基于向量vectordata1.b <- data1order<-sort(data1.b$gene1,index.return=TRUE,decreasing = TRUE)
data1.b$sample <- factor(data1.b$sample , levels = data1.b$sample [order$ix]) ggplot(data=data1.b,aes(x=sample,y=gene1))+ geom_bar(stat = "identity", width = 0.8,           colour="black",size=0.25,fill="black",alpha=1)


将所有样本的基因表达值都绘制出来,position=position_dodge()表示柱子并排放置。也可以通过position_dodge()函数来改变数据序列间的间隔。

data2 <- data.frame(gene = rownames(data),data)data2 <- melt(data2, id.vars=c("gene"))ggplot(data2, aes(x=gene, y=value))+ geom_bar(stat="identity", position=position_dodge(), aes(fill=variable))

但是,通常我们是不这样作图的,而是取均值,加上误差线。

# 获取平均值和标准差data3 <- data2 %>% group_by(gene) %>% dplyr::summarise(sd=sd(value), value=mean(value))data3 <- as.data.frame(data3)
> data3 gene sd value1 gene1 0.9710819 2.862 gene2 7.5491721 12.103 gene3 1.2837445 10.444 gene4 3.1325708 8.74
ggplot(data3, aes(x=gene, y=value)) + geom_bar(stat="identity", aes(fill=gene)) + geom_errorbar(aes(ymin=value-sd, ymax=value+sd), width=0.2, position=position_dodge(width=0.75)) + theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black") )

2.双序列图的绘制

library(reshape2)data4 <- data.frame(Gene = c("gene1","gene2","gene3"), CTRL = c(7.67,4.02,3.95), Drug = c(5.84,6.45,6.76),stringsAsFactors=FALSE)#colnames(data4) <- c("Gene","CTRL","Drug")data4<-melt(data4,id.vars="Gene")data4
> data4 Gene variable value1 gene1 CTRL 7.672 gene2 CTRL 4.023 gene3 CTRL 3.954 gene1 Drug 5.845 gene2 Drug 6.456 gene3 Drug 6.76
ggplot(data=data4,aes(Gene,value,fill=variable))+ geom_bar(stat="identity",position=position_dodge(), color="black",width=0.7,size=0.25)+ scale_fill_manual(values=c("#A61CE6", "#E81CA4"))+ ylim(0, 10)+ theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black"), legend.title=element_text(size=14,face="plain",color="black"), legend.background =element_blank(), legend.position = c(0.88,0.88) ) + ylab("Expression values")

按CTRL组排序。

data5 <- data.frame(Gene = c("gene1","gene2","gene3"), CTRL = c(8.67,4.02,6.95), Drug = c(5.84,6.45,6.76),stringsAsFactors=FALSE)
data5$Gene <- factor(data5$Gene, levels = data5$Gene[order(data5[,"CTRL"],decreasing = TRUE)])
data5 <- melt(data5,id.vars='Gene')
ggplot(data=data5,aes(Gene,value,fill=variable))+ geom_bar(stat="identity", color="black", position=position_dodge(),width=0.7,size=0.25)+ scale_fill_manual(values=c("#00AFBB", "#E7B800"))+ ylim(0, 10)+ ylab("Expression values") + theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black"), legend.title=element_text(size=14,face="plain",color="black"), legend.background =element_blank(), legend.position = c(0.88,0.88) )

3.堆积柱状图

data6 <- data.frame(Gene = c("gene1","gene2","gene3","gene4","gene5"), sam1 = c(150,1200,1300,2800,2000), sam2 =c(400,1100,2300,2900,2700), sam3 = c(390,1700,3300,3500,4200), sam4 = c(300,900,1900,2800,3300), sam5 = c(130,790,1800,3000,4200), sam6 = c(100,1300,1900,1800,2700), sam7 = c(100,1200,1700,1600,2100), sam8 = c(150,1100,1300,1280,1300),stringsAsFactors=FALSE)
data6 <- melt(data6,id.vars='Gene')
ggplot(data=data6,aes(variable,value,fill=Gene))+ geom_bar(stat="identity",position="stack", color="black", width=0.7,size=0.25)+ scale_fill_manual(values=brewer.pal(9,"YlOrRd")[c(6:2)])+ ylim(0, 15000)+ xlab("Sample") + ylab("Expression values") + theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black"), legend.title=element_text(size=14,face="plain",color="black"), legend.background =element_blank(), legend.position = c(0.85,0.82) )

data7 <- data.frame(Gene = c("gene1","gene2","gene3","gene4","gene5"), sam1 = c(150,1200,1300,2800,2000), sam2 =c(400,1100,2300,2900,2700), sam3 = c(390,1700,3300,3500,4200), sam4 = c(300,900,1900,2800,3300), sam5 = c(130,790,1800,3000,4200), sam6 = c(100,1300,1900,1800,2700), sam7 = c(100,1200,1700,1600,2100),                   sam8 = c(150,1100,1300,1280,1300),stringsAsFactors=FALSE)
> data7 Gene sam1 sam2 sam3 sam4 sam5 sam6 sam7 sam81 gene1 150 400 390 300 130 100 100 1502 gene2 1200 1100 1700 900 790 1300 1200 11003 gene3 1300 2300 3300 1900 1800 1900 1700 13004 gene4 2800 2900 3500 2800 3000 1800 1600 12805 gene5 2000 2700 4200 3300 4200 2700 2100 1300
##按行求和,排序sum <- sort(rowSums(data7[,2:ncol(data7)]),index.return=TRUE)#按列求和,排序colsum<-sort(colSums(data7[,2:ncol(data7)]),index.return=TRUE,decreasing = TRUE)
data7 <- data7[,c(1,colsum$ix+1)]
> data7 Gene sam3 sam5 sam2 sam4 sam6 sam1 sam7 sam81 gene1 390 130 400 300 100 150 100 1502 gene2 1700 790 1100 900 1300 1200 1200 11003 gene3 3300 1800 2300 1900 1900 1300 1700 13004 gene4 3500 3000 2900 2800 1800 2800 1600 12805 gene5 4200 4200 2700 3300 2700 2000 2100 1300
data7$Gene <- factor(data7$Gene, levels = data7$Gene[order(sum$ix)])data7<-melt(data7,id.vars='Gene')ggplot(data=data7,aes(variable,value,fill=Gene))+ geom_bar(stat="identity",position="stack", color="black", width=0.7,size=0.25)+ scale_fill_manual(values=brewer.pal(9,"YlOrRd")[c(6:2)])+ ylim(0, 15000)+ xlab("Sample") + ylab("Expression values")+ theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black"), legend.title=element_text(size=14,face="plain",color="black"), legend.background =element_blank(), legend.position = c(0.85,0.82) )

4.百分比堆积柱形图

scale_fill_manual用于修改填充色。

ggplot(data=data7,aes(variable,value,fill=Gene))+ geom_bar(stat="identity", position="fill",color="black", width=0.8,size=0.25)+ scale_fill_manual(values=brewer.pal(9,"GnBu")[c(7:2)])+ xlab("Sample") + ylab("Expression values")+ theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black"), legend.title=element_text(size=14,face="plain",color="black"), legend.position = "right"  )

5.不等宽柱形图

library(ggplot2)#install.packages("Cairo")library(Cairo)#install.packages("showtext")library(showtext)
data8<-data.frame(Name=paste0("Group",1:5),Scale=c(35,30,20,25,15),Count=c(56,37,63,57,59))data8$xmin<-0for (i in 2:5){ data8$xmin[i]<-sum(data8$Scale[1:i-1])}#构造矩形X轴的终点(最大点)for (i in 1:5){ data8$xmax[i]<-sum(data8$Scale[1:i])}#构造数据标签的横坐标:for (i in 1:5){ data8$label[i]<-sum(data8$Scale[1:i])-data8$Scale[i]/2}data8
> data8 Name Scale Count xmin xmax label1 Group1 35 56 0 35 17.52 Group2 30 37 35 65 50.03 Group3 20 63 65 85 75.04 Group4 25 57 85 110 97.55 Group5 15 59 110 125 117.5
#windowsFonts(myFont = windowsFont("微软雅黑"))#颜色的映射设定是在 aes() 内部完成的,而颜色的重新设定是在 aes() 外部完成的ggplot(data8)+ geom_rect(aes(xmin=xmin,xmax=xmax,ymin=0,ymax=Count,fill=Name),colour="black",size=0.25)+ geom_text(aes(x=label,y=Count+3,label=Count),size=4,col="black")+ geom_text(aes(x=label,y=-2.5,label=Name),size=4,col="black")+ ylab("Count")+ xlab("Group")+ ylim(-5,80)+ theme(panel.background=element_rect(fill="white",colour=NA), panel.grid.major = element_line(colour = "grey60",size=.25,linetype ="dotted" ), panel.grid.minor = element_line(colour = "grey60",size=.25,linetype ="dotted" ), text=element_text(size=15), plot.title=element_text(size=15,hjust=.5),#family="myfont", legend.position="none" )

5.径向柱形图

data9 <- data.frame(species=rep(paste0("specie",c(1:10)), 5), gene=rep(paste0("gene",c(1:5)), each=10), value=rep((1:5), each=10) + rnorm(50, 0,.5))
head(data9)
> head(data9) species gene value1 specie1 gene1 0.81780022 specie2 gene1 0.53656433 specie3 gene1 0.78362654 specie4 gene1 0.91587485 specie5 gene1 0.89297676 specie6 gene1 1.9134189
myAng <- seq(-20,-340,length.out = 10)ggplot(data=data9,aes(species,value,fill=gene))+ geom_bar(stat="identity", color="black", position=position_dodge(),width=0.7,size=0.25)+ coord_polar(theta = "x",start=0) + ylim(c(-3,6))+ scale_fill_brewer()+ theme_light()+ theme( panel.background = element_blank(), panel.grid.major = element_line(colour = "grey80",size=.25), axis.text.y = element_text(size = 12,colour="black"), axis.line.y = element_line(size=0.25), axis.text.x=element_text(size = 13,colour="black",angle = myAng))

coord_polar将直角坐标转化为极坐标。

参考资料:

1.R语言数据可视化之美,张杰/著

    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章