1.单数据系列柱状图 ###绘图数据 data <- "Sample1;Sample2;Sample3;Sample4;Sample5 gene1;2.6;2.9;2.1;4.5;2.2 gene2;20.8;9.8;7.0;3.7;19.2 gene3;10.0;11.0;9.2;12.4;9.6 gene4;9;3.3;10.3;11.1;10" data <- read.table(text=data, header=T, row.names=1, sep=";", quote="") data ##gene1的在不同样本中的表达 data1 <- as.data.frame(t(data)[,1]) names(data1) <- "gene1" data1$sample <- rownames(data1) > data1 gene1 sample Sample1 2.6 Sample1 Sample2 2.9 Sample2 Sample3 2.1 Sample3 Sample4 4.5 Sample4 Sample5 2.2 Sample5 绘图:geom_bar用于绘制柱状图,ylim设置纵轴值范围,them设置主题,axis.title设置坐标轴名称参数,axis.text设置坐标轴参数。 ggplot(data=data1,aes(x=sample,y=gene1))+ geom_bar(stat = "identity", width = 0.8,colour="black",size=0.25, fill="#FC4E07",alpha=1)+ ylim(0,max(data1$gene1))+ theme( axis.title=element_text(size=15,face="plain",color="blue"), axis.text = element_text(size=12,face="plain",color="red") ) 可将数据进行排序后绘图。 #排序方法1:基于数据框data.frame library(dplyr) data1.a<-arrange(data1,desc(gene1)) data1.a$sample <- factor(data1.a$sample, levels = data1.a$sample) ggplot(data=data1.a,aes(x=sample,y=gene1))+ geom_bar(stat = "identity", width = 0.8, colour="black",size=0.25,fill="#FC4E07",alpha=1) #排序方法2:基于向量vector data1.b <- data1 order<-sort(data1.b$gene1,index.return=TRUE,decreasing = TRUE)
data1.b$sample <- factor(data1.b$sample , levels = data1.b$sample [order$ix]) ggplot(data=data1.b,aes(x=sample,y=gene1))+ geom_bar(stat = "identity", width = 0.8, colour="black",size=0.25,fill="black",alpha=1) 将所有样本的基因表达值都绘制出来,position=position_dodge()表示柱子并排放置。也可以通过position_dodge()函数来改变数据序列间的间隔。 data2 <- data.frame(gene = rownames(data),data) data2 <- melt(data2, id.vars=c("gene")) ggplot(data2, aes(x=gene, y=value))+ geom_bar(stat="identity", position=position_dodge(), aes(fill=variable)) 但是,通常我们是不这样作图的,而是取均值,加上误差线。 # 获取平均值和标准差 data3 <- data2 %>% group_by(gene) %>% dplyr::summarise(sd=sd(value), value=mean(value)) data3 <- as.data.frame(data3) > data3 gene sd value 1 gene1 0.9710819 2.86 2 gene2 7.5491721 12.10 3 gene3 1.2837445 10.44 4 gene4 3.1325708 8.74 ggplot(data3, aes(x=gene, y=value)) + geom_bar(stat="identity", aes(fill=gene)) + geom_errorbar(aes(ymin=value-sd, ymax=value+sd), width=0.2, position=position_dodge(width=0.75)) + theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black") ) 2.双序列图的绘制 library(reshape2) data4 <- data.frame(Gene = c("gene1","gene2","gene3"), CTRL = c(7.67,4.02,3.95), Drug = c(5.84,6.45,6.76),stringsAsFactors=FALSE) #colnames(data4) <- c("Gene","CTRL","Drug") data4<-melt(data4,id.vars="Gene") data4 > data4 Gene variable value 1 gene1 CTRL 7.67 2 gene2 CTRL 4.02 3 gene3 CTRL 3.95 4 gene1 Drug 5.84 5 gene2 Drug 6.45 6 gene3 Drug 6.76 ggplot(data=data4,aes(Gene,value,fill=variable))+ geom_bar(stat="identity",position=position_dodge(), color="black",width=0.7,size=0.25)+ scale_fill_manual(values=c("#A61CE6", "#E81CA4"))+ ylim(0, 10)+ theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black"), legend.title=element_text(size=14,face="plain",color="black"), legend.background =element_blank(), legend.position = c(0.88,0.88) ) + ylab("Expression values") 按CTRL组排序。 data5 <- data.frame(Gene = c("gene1","gene2","gene3"), CTRL = c(8.67,4.02,6.95), Drug = c(5.84,6.45,6.76),stringsAsFactors=FALSE)
data5$Gene <- factor(data5$Gene, levels = data5$Gene[order(data5[,"CTRL"],decreasing = TRUE)])
data5 <- melt(data5,id.vars='Gene')
ggplot(data=data5,aes(Gene,value,fill=variable))+ geom_bar(stat="identity", color="black", position=position_dodge(),width=0.7,size=0.25)+ scale_fill_manual(values=c("#00AFBB", "#E7B800"))+ ylim(0, 10)+ ylab("Expression values") + theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black"), legend.title=element_text(size=14,face="plain",color="black"), legend.background =element_blank(), legend.position = c(0.88,0.88) ) 3.堆积柱状图 data6 <- data.frame(Gene = c("gene1","gene2","gene3","gene4","gene5"), sam1 = c(150,1200,1300,2800,2000), sam2 =c(400,1100,2300,2900,2700), sam3 = c(390,1700,3300,3500,4200), sam4 = c(300,900,1900,2800,3300), sam5 = c(130,790,1800,3000,4200), sam6 = c(100,1300,1900,1800,2700), sam7 = c(100,1200,1700,1600,2100), sam8 = c(150,1100,1300,1280,1300),stringsAsFactors=FALSE)
data6 <- melt(data6,id.vars='Gene')
ggplot(data=data6,aes(variable,value,fill=Gene))+ geom_bar(stat="identity",position="stack", color="black", width=0.7,size=0.25)+ scale_fill_manual(values=brewer.pal(9,"YlOrRd")[c(6:2)])+ ylim(0, 15000)+ xlab("Sample") + ylab("Expression values") + theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black"), legend.title=element_text(size=14,face="plain",color="black"), legend.background =element_blank(), legend.position = c(0.85,0.82) ) data7 <- data.frame(Gene = c("gene1","gene2","gene3","gene4","gene5"), sam1 = c(150,1200,1300,2800,2000), sam2 =c(400,1100,2300,2900,2700), sam3 = c(390,1700,3300,3500,4200), sam4 = c(300,900,1900,2800,3300), sam5 = c(130,790,1800,3000,4200), sam6 = c(100,1300,1900,1800,2700), sam7 = c(100,1200,1700,1600,2100), sam8 = c(150,1100,1300,1280,1300),stringsAsFactors=FALSE) > data7 Gene sam1 sam2 sam3 sam4 sam5 sam6 sam7 sam8 1 gene1 150 400 390 300 130 100 100 150 2 gene2 1200 1100 1700 900 790 1300 1200 1100 3 gene3 1300 2300 3300 1900 1800 1900 1700 1300 4 gene4 2800 2900 3500 2800 3000 1800 1600 1280 5 gene5 2000 2700 4200 3300 4200 2700 2100 1300 ##按行求和,排序 sum <- sort(rowSums(data7[,2:ncol(data7)]),index.return=TRUE) #按列求和,排序 colsum<-sort(colSums(data7[,2:ncol(data7)]),index.return=TRUE,decreasing = TRUE)
data7 <- data7[,c(1,colsum$ix+1)] > data7 Gene sam3 sam5 sam2 sam4 sam6 sam1 sam7 sam8 1 gene1 390 130 400 300 100 150 100 150 2 gene2 1700 790 1100 900 1300 1200 1200 1100 3 gene3 3300 1800 2300 1900 1900 1300 1700 1300 4 gene4 3500 3000 2900 2800 1800 2800 1600 1280 5 gene5 4200 4200 2700 3300 2700 2000 2100 1300 data7$Gene <- factor(data7$Gene, levels = data7$Gene[order(sum$ix)]) data7<-melt(data7,id.vars='Gene') ggplot(data=data7,aes(variable,value,fill=Gene))+ geom_bar(stat="identity",position="stack", color="black", width=0.7,size=0.25)+ scale_fill_manual(values=brewer.pal(9,"YlOrRd")[c(6:2)])+ ylim(0, 15000)+ xlab("Sample") + ylab("Expression values")+ theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black"), legend.title=element_text(size=14,face="plain",color="black"), legend.background =element_blank(), legend.position = c(0.85,0.82) ) 4.百分比堆积柱形图 scale_fill_manual用于修改填充色。 ggplot(data=data7,aes(variable,value,fill=Gene))+ geom_bar(stat="identity", position="fill",color="black", width=0.8,size=0.25)+ scale_fill_manual(values=brewer.pal(9,"GnBu")[c(7:2)])+ xlab("Sample") + ylab("Expression values")+ theme( axis.title=element_text(size=15,face="plain",color="black"), axis.text = element_text(size=12,face="plain",color="black"), legend.title=element_text(size=14,face="plain",color="black"), legend.position = "right" ) 5.不等宽柱形图 library(ggplot2) #install.packages("Cairo") library(Cairo) #install.packages("showtext") library(showtext) data8<-data.frame(Name=paste0("Group",1:5),Scale=c(35,30,20,25,15),Count=c(56,37,63,57,59)) data8$xmin<-0 for (i in 2:5){ data8$xmin[i]<-sum(data8$Scale[1:i-1]) } #构造矩形X轴的终点(最大点) for (i in 1:5){ data8$xmax[i]<-sum(data8$Scale[1:i]) } #构造数据标签的横坐标: for (i in 1:5){ data8$label[i]<-sum(data8$Scale[1:i])-data8$Scale[i]/2 } data8 > data8 Name Scale Count xmin xmax label 1 Group1 35 56 0 35 17.5 2 Group2 30 37 35 65 50.0 3 Group3 20 63 65 85 75.0 4 Group4 25 57 85 110 97.5 5 Group5 15 59 110 125 117.5 #windowsFonts(myFont = windowsFont("微软雅黑")) #颜色的映射设定是在 aes() 内部完成的,而颜色的重新设定是在 aes() 外部完成的 ggplot(data8)+ geom_rect(aes(xmin=xmin,xmax=xmax,ymin=0,ymax=Count,fill=Name),colour="black",size=0.25)+ geom_text(aes(x=label,y=Count+3,label=Count),size=4,col="black")+ geom_text(aes(x=label,y=-2.5,label=Name),size=4,col="black")+ ylab("Count")+ xlab("Group")+ ylim(-5,80)+ theme(panel.background=element_rect(fill="white",colour=NA), panel.grid.major = element_line(colour = "grey60",size=.25,linetype ="dotted" ), panel.grid.minor = element_line(colour = "grey60",size=.25,linetype ="dotted" ), text=element_text(size=15), plot.title=element_text(size=15,hjust=.5),#family="myfont", legend.position="none" ) 5.径向柱形图 data9 <- data.frame(species=rep(paste0("specie",c(1:10)), 5), gene=rep(paste0("gene",c(1:5)), each=10), value=rep((1:5), each=10) + rnorm(50, 0,.5))
head(data9) > head(data9) species gene value 1 specie1 gene1 0.8178002 2 specie2 gene1 0.5365643 3 specie3 gene1 0.7836265 4 specie4 gene1 0.9158748 5 specie5 gene1 0.8929767 6 specie6 gene1 1.9134189 myAng <- seq(-20,-340,length.out = 10) ggplot(data=data9,aes(species,value,fill=gene))+ geom_bar(stat="identity", color="black", position=position_dodge(),width=0.7,size=0.25)+ coord_polar(theta = "x",start=0) + ylim(c(-3,6))+ scale_fill_brewer()+ theme_light()+ theme( panel.background = element_blank(), panel.grid.major = element_line(colour = "grey80",size=.25), axis.text.y = element_text(size = 12,colour="black"), axis.line.y = element_line(size=0.25), axis.text.x=element_text(size = 13,colour="black",angle = myAng))
coord_polar将直角坐标转化为极坐标。 参考资料: 1.R语言数据可视化之美,张杰/著
|
|