刷到了一个新鲜出炉(2023-08-10)的生物信息学数据挖掘文章:《Integrating bioinformatic resources to identify characteristics of rheumatoid arthritis-related usual interstitial pneumonia》
这个文章做了 Differentially expression analysis of GSE199152 ,这个数据集 GSE199152 (3 RA-UIP, 20 IPF-UIP patients and 4 non-UIP controls) ,然后就可视化了 DESeq2, EdgeR and Limma packages were used to filter up-regulated DEGs
也就是说,作者仅仅是关心里面的上调基因,然后针对交集基因列表去做功能富集。关于这个交集基因作者使用了一个韦恩图展示 ,如下所示:
DESeq2, EdgeR and Limma的交集 学徒作业做起码10个转录组测序数据 的表达量矩阵的 DESeq2, EdgeR and Limma的差异分析然后可视化他们的交集的汇总情况。
下面给出来一个示例代码:
step1:表达量矩阵整理# 魔幻操作,一键清空 rm(list = ls()) options(stringsAsFactors = F )library (data.table)library (airway,quietly = T ) data(airway) mat <- assay(airway) mat[1 :4 ,1 :4 ] keep_feature <- rowSums (mat > 1 ) > 1 ensembl_matrix <- mat[keep_feature, ] library (AnnoProbe) head(rownames(ensembl_matrix)) ids=annoGene(rownames(ensembl_matrix),'ENSEMBL' ,'human' ) head(ids) ids=ids[!duplicated(ids$SYMBOL),] ids=ids[!duplicated(ids$ENSEMBL),] symbol_matrix= ensembl_matrix[match(ids$ENSEMBL,rownames(ensembl_matrix)),] rownames(symbol_matrix) = ids$SYMBOL symbol_matrix[1 :4 ,1 :4 ] group_list = as.character(airway@colData$dex);group_list group_list=ifelse(group_list=='untrt' ,'control' ,'case' ) table(group_list) group_list = factor(group_list,levels = c('control' ,'case' )) save(symbol_matrix,group_list,file = 'symbol_matrix.Rdata' )
step2.1 : 使用DESeq2做差异分析library (limma)library (edgeR)library (DESeq2) load(file = 'symbol_matrix.Rdata' ) symbol_matrix[1 :4 ,1 :4 ] exprSet = symbol_matrix (colData <- data.frame(row.names=colnames(exprSet), group_list=group_list) ) dds <- DESeqDataSetFromMatrix(countData = exprSet, colData = colData, design = ~ group_list) dds <- DESeq(dds) res <- results(dds, contrast=c("group_list" , levels(group_list)[2 ], levels(group_list)[1 ])) resOrdered <- res[order(res$padj),] head(resOrdered) DEG =as.data.frame(resOrdered) DEG_deseq2 = na.omit(DEG) save(DEG_deseq2, file = 'DEG_deseq2.Rdata' )
step2.2 : 使用edgeR做差异分析 load(file = 'symbol_matrix.Rdata' ) symbol_matrix[1 :4 ,1 :4 ] exprSet = symbol_matrix d <- DGEList(counts=exprSet, group= group_list) keep <- rowSums(cpm(d)>1 ) >= 2 table(keep) d <- d[keep, , keep.lib.sizes=FALSE ] d$samples$lib.size <- colSums(d$counts) d <- calcNormFactors(d) d$samples dge=d design <- model.matrix(~0 +factor(group_list)) rownames(design)<-colnames(dge) colnames(design)<-levels(factor(group_list)) dge <- estimateGLMCommonDisp(dge,design) dge <- estimateGLMTrendedDisp(dge, design) dge <- estimateGLMTagwiseDisp(dge, design) fit <- glmFit(dge, design)# https://www./p/110861/ lrt <- glmLRT(fit, contrast=c(-1 ,1 )) nrDEG=topTags(lrt, n=nrow(dge)) DEG_edgeR = as.data.frame(nrDEG) head(DEG_edgeR) save(DEG_edgeR, file = 'DEG_edgeR.Rdata' )
step2.3 : 使用limma做差异分析 load(file = 'symbol_matrix.Rdata' ) symbol_matrix[1 :4 ,1 :4 ] exprSet = symbol_matrix design <- model.matrix(~0 +factor(group_list)) colnames(design)=levels(factor(group_list)) rownames(design)=colnames(exprSet) design dge <- DGEList(counts=exprSet) dge <- calcNormFactors(dge) logCPM <- cpm(dge, log=TRUE , prior.count=3 ) v <- voom(dge,design,plot=TRUE , normalize="quantile" ) fit <- lmFit(v, design) group_list g1=levels(group_list)[1 ] g2=levels(group_list)[2 ] con=paste0(g2,'-' ,g1) cat(con) cont.matrix=makeContrasts(contrasts=c(con),levels = design) fit2=contrasts.fit(fit,cont.matrix) fit2=eBayes(fit2) tempOutput = topTable(fit2, coef=con, n=Inf ) DEG_limma_voom = na.omit(tempOutput) head(DEG_limma_voom) save(DEG_limma_voom, file = 'DEG_limma_voom.Rdata' )