跋山涉水,翻山越岭,找到一个非常好用的GO分析R语言代码。 视频网址: https://www.bilibili.com/video/BV1a94y1f7sK/?spm_id_from=333.788&vd_source=c5d7403d1b2832c643e831d68ff1474c 作者的主页:https://space.bilibili.com/388487656 没办法,咱不是计算机专业的,怕这个超级好用的代码失传,先复制下来保存。 代码解说 library("org.Hs.eg.db") library("clusterProfiler") library("enrichplot") library("ggplot2") library("ggnewscale") library("enrichplot") library("DOSE") library(stringr) library是调用R语言包,如果没有上面的包,要先安装。 安装R语言包的R语言: if(!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("clusterProfiler") BiocManager::install("topGO") BiocManager::install("Rgraphviz") BiocManager::install("pathview") BiocManager::install("org.Mm.eg.db") Mm老鼠基因 BiocManager::install("org.Hs.eg.db") Hs人类基因包 install.packages("ggplot2") 特别注意:“”和""不同,要在R语言下英文输入引号。 安装这些个包包,要好久。 pvalueFilter=0.05 qvalueFilter=1 showNum=8 pvalueFilter不能超过0.05,相当于统计学P值,大了没有意义。 showNum=8,展示8个条目,=18就展示18个GO条目。 下面的target.txt,指的是文件必须命名为“target.txt”,并且要放在R语言的运行区域,比如我的R软件运行C盘我的文档,我就把文件放在我的文档。 并且,文件格式如下: rt=read.table("target.txt",sep="\t",check.names=F,header=F) genes=as.vector(rt[,1]) entrezIDs <- mget(genes, org.Hs.egSYMBOL2EG, ifnotfound=NA) entrezIDs <- as.character(entrezIDs) rt=cbind(rt,entrezID=entrezIDs) colnames(rt)=c("symbol","entrezID") rt=rt[is.na(rt[,"entrezID"])==F,] gene=rt$entrezID gene=unique(gene) 上面的Hs指的是人类基因。 colorSel="qvalue" if(qvalueFilter>0.05){ colorSel="pvalue" } all好像是BP(生物学过程)、CC(细胞功能)、MF(分子功能)3个基因功能。 kk=enrichGO(gene = gene,OrgDb = org.Hs.eg.db, pvalueCutoff =1, qvalueCutoff = 1, ont="all", readable =T) GO=as.data.frame(kk) GO=GO[(GO$pvalue<pvalueFilter & GO$qvalue<qvalueFilter),] write.table(GO,file="GO.xls",sep="\t",quote=F,row.names = F) if(nrow(GO)<30){ showNum=nrow(GO) } 输出格式为PDF pdf(file="GO_barplot.pdf",width = 9,height =7) bar=barplot(kk, drop = TRUE, showCategory =showNum,split="ONTOLOGY",color = colorSel) + facet_grid(ONTOLOGY~., scale='free')+scale_y_discrete(labels=function(x) stringr::str_wrap(x, width=60)) print(bar) dev.off() pdf(file="GO_bubble.pdf",width = 9,height =7) bub=dotplot(kk,showCategory = showNum, orderBy = "GeneRatio",split="ONTOLOGY", color = colorSel) + facet_grid(ONTOLOGY~., scale='free')+scale_y_discrete(labels=function(x) stringr::str_wrap(x, width=60)) print(bub) dev.off() showCategory,显示类别,我猜的。 pdf(file="GO_cnet.pdf",width = 10,height = 5) af=setReadable(kk, 'org.Hs.eg.db', 'ENTREZID') cnetplot(kk, showCategory = 10, categorySize="pvalue",circular = TRUE,colorEdge = TRUE,cex_label_category=0.65,cex_label_gene=0.6) dev.off() pdf(file="GO_net.pdf",width = 9,height = 7) x2 <- pairwise_termsim(kk) emapplot(x2,showCategory = 20,cex_label_category=0.65,color = "pvalue",layout ="nicely" ) dev.off() 代码原文 library("org.Hs.eg.db") library("clusterProfiler") library("enrichplot") library("ggplot2") library("ggnewscale") library("enrichplot") library("DOSE") library(stringr) pvalueFilter=0.05 qvalueFilter=1 showNum=8 rt=read.table("target.txt",sep="\t",check.names=F,header=F) genes=as.vector(rt[,1]) entrezIDs <- mget(genes, org.Hs.egSYMBOL2EG, ifnotfound=NA) entrezIDs <- as.character(entrezIDs) rt=cbind(rt,entrezID=entrezIDs) colnames(rt)=c("symbol","entrezID") rt=rt[is.na(rt[,"entrezID"])==F,] gene=rt$entrezID gene=unique(gene) colorSel="qvalue" if(qvalueFilter>0.05){ colorSel="pvalue" } kk=enrichGO(gene = gene,OrgDb = org.Hs.eg.db, pvalueCutoff =1, qvalueCutoff = 1, ont="all", readable =T) GO=as.data.frame(kk) GO=GO[(GO$pvalue<pvalueFilter & GO$qvalue<qvalueFilter),] write.table(GO,file="GO.xls",sep="\t",quote=F,row.names = F) if(nrow(GO)<30){ showNum=nrow(GO) } pdf(file="GO_barplot.pdf",width = 9,height =7) bar=barplot(kk, drop = TRUE, showCategory =showNum,split="ONTOLOGY",color = colorSel) + facet_grid(ONTOLOGY~., scale='free')+scale_y_discrete(labels=function(x) stringr::str_wrap(x, width=60)) print(bar) dev.off() pdf(file="GO_bubble.pdf",width = 9,height =7) bub=dotplot(kk,showCategory = showNum, orderBy = "GeneRatio",split="ONTOLOGY", color = colorSel) + facet_grid(ONTOLOGY~., scale='free')+scale_y_discrete(labels=function(x) stringr::str_wrap(x, width=60)) print(bub) dev.off() pdf(file="GO_cnet.pdf",width = 10,height = 5) af=setReadable(kk, 'org.Hs.eg.db', 'ENTREZID') cnetplot(kk, showCategory = 10, categorySize="pvalue",circular = TRUE,colorEdge = TRUE,cex_label_category=0.65,cex_label_gene=0.6) dev.off() pdf(file="GO_net.pdf",width = 9,height = 7) x2 <- pairwise_termsim(kk) emapplot(x2,showCategory = 20,cex_label_category=0.65,color = "pvalue",layout ="nicely" ) dev.off() 其他 备注:如有侵权,联系删除,反正我也看不懂,勉强会用一点点。 |
|