分享

R语言GEO数据处理(六)

 勤劳的bee 2021-05-27

# 5. 差异分析 -----------------------------------------------------------------

library(limma)

design=model.matrix(~group_list)

fit=lmFit(exp2,design)

fit=eBayes(fit)

deg=topTable(fit,coef=2,number = Inf)

colnames(deg)

#probe_id列,把行名变成一列

library(dplyr)

deg <- mutate(deg,probe_id=rownames(deg$probe_id));head(deg)

#symbol列,去重复

deg <- inner_join(deg,ids,by="probe_id");head(deg)

deg <- deg[!duplicated(deg$symbol),]

#标记上下调基因

logFC=0.1  #变化超过2倍的视为差异基因

P.Value = 0.01  #P值小于等于0.01视为显著

k1 = (deg$P.Value < P.Value)&(deg$logFC < -logFC)

k2 = (deg$P.Value < P.Value)&(deg$logFC > logFC)

change = ifelse(k1,"down",ifelse(k2,"up","not"))

deg <- mutate(deg,change);head(deg)

table(deg$change)

#ENTREZID列,用于富集分析

library(ggplot2)

library(clusterProfiler)

library(org.Hs.eg.db)

s2e <- bitr(deg$symbol,

            fromType = "SYMBOL",

            toType = "ENTREZID",

            OrgDb = org.Hs.eg.db)#人类

deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"));head(deg)

write.csv(deg,file = "mydata.csv",row.names = T)

save(group_list,deg,logFC_t,P.Value_t,file = "mydata.Rdata")

    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多