# 5. 差异分析 ----------------------------------------------------------------- library(limma) design=model.matrix(~group_list) fit=lmFit(exp2,design) fit=eBayes(fit) deg=topTable(fit,coef=2,number = Inf) colnames(deg) #加probe_id列,把行名变成一列 library(dplyr) deg <- mutate(deg,probe_id=rownames(deg$probe_id));head(deg) #加symbol列,去重复 deg <- inner_join(deg,ids,by="probe_id");head(deg) deg <- deg[!duplicated(deg$symbol),] #标记上下调基因 logFC=0.1 #变化超过2倍的视为差异基因 P.Value = 0.01 #P值小于等于0.01视为显著 k1 = (deg$P.Value < P.Value)&(deg$logFC < -logFC) k2 = (deg$P.Value < P.Value)&(deg$logFC > logFC) change = ifelse(k1,"down",ifelse(k2,"up","not")) deg <- mutate(deg,change);head(deg) table(deg$change) #加ENTREZID列,用于富集分析 library(ggplot2) library(clusterProfiler) library(org.Hs.eg.db) s2e <- bitr(deg$symbol, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)#人类 deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"));head(deg) write.csv(deg,file = "mydata.csv",row.names = T) save(group_list,deg,logFC_t,P.Value_t,file = "mydata.Rdata") |
|