分享

学习WGCNA总结 | Public Library of Bioinformatics

 yjt2004us 2018-06-15
setwd('F:/WGCNA')
library(WGCNA)
options(stringsAsFactors = FALSE)
enableWGCNAThreads()
#1. 数据读入,处理和保存
fpkm<- read.csv('trans_counts.counts.matrix.TMM_normalized.FPKM.nozero.csv')
head(fpkm)
dim(fpkm)
names(fpkm)
datExpr0=as.data.frame(t(fpkm[,-c(1)]));
names(datExpr0)=fpkm$trans;
rownames(datExpr0)=names(fpkm)[-c(1)];
#data<-log10(date[,-1]+0.01)
gsg = goodSamplesGenes(datExpr0, verbose = 3);
gsg$allOK
sampleTree = hclust(dist(datExpr0), method = 'average')
#sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = 'Sample clustering to detect outliers', sub='', xlab='', cex.lab = 1.5,
     cex.axis = 1.5, cex.main = 2)
abline(h = 80000, col = 'red');
clust = cutreeStatic(sampleTree, cutHeight = 80000, minSize = 10)
table(clust)
keepSamples = (clust==1)
datExpr = datExpr0[keepSamples, ]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
save(datExpr, file = 'AS-green-FPKM-01-dataInput.RData')
#2. 选择合适的阀值
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# Plot the results:
##sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab='Soft Threshold (power)',ylab='Scale Free Topology Model Fit,signed R^2',type='n',
     main = paste('Scale independence'));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col='red');
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col='red')
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab='Soft Threshold (power)',ylab='Mean Connectivity', type='n',
     main = paste('Mean connectivity'))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col='red')
#=====================================================================================
# 网络构建有两种方法,One-step和Step-by-step;
#  第一种:一步法进行网络构建
#=====================================================================================
#3. 一步法网络构建:One-step network construction and module detection
net = blockwiseModules(datExpr, power = 6, maxBlockSize = 6000,
                       TOMType = 'unsigned', minModuleSize = 30,
                       reassignThreshold = 0, mergeCutHeight = 0.25,
                       numericLabels = TRUE, pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = 'AS-green-FPKM-TOM',
                       verbose = 3)
table(net$colors)
#4. 绘画结果展示
# open a graphics window
#sizeGrWindow(12, 9)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                    'Module colors',
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
#5.结果保存
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
table(moduleColors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
     file = 'AS-green-FPKM-02-networkConstruction-auto.RData')
#6. 导出网络到Cytoscape
# Recalculate topological overlap if needed
TOM = TOMsimilarityFromExpr(datExpr, power = 6);
# Read in the annotation file
# annot = read.csv(file = 'GeneAnnotation.csv');
# Select modules需要修改,选择需要导出的模块颜色
modules = c('turquoise', 'blue');
# Select module probes选择模块探测
probes = names(datExpr)
inModule = is.finite(match(moduleColors, modules));
modProbes = probes[inModule];
#modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
cyt = exportNetworkToCytoscape(modTOM,
                               edgeFile = paste('AS-green-FPKM-One-step-CytoscapeInput-edges-', paste(modules, collapse='-'), '.txt', sep=''),
                               nodeFile = paste('AS-green-FPKM-One-step-CytoscapeInput-nodes-', paste(modules, collapse='-'), '.txt', sep=''),
                               weighted = TRUE,
                               threshold = 0.02,
                               nodeNames = modProbes,
                               #altNodeNames = modGenes,
                               nodeAttr = moduleColors[inModule]);
#=====================================================================================
#  分析网络可视化,用heatmap可视化权重网络,heatmap每一行或列对应一个基因,颜色越深表示有较高的邻近
#=====================================================================================
options(stringsAsFactors = FALSE);
lnames = load(file = 'AS-green-FPKM-01-dataInput.RData');
lnames
lnames = load(file = 'AS-green-FPKM-02-networkConstruction-auto.RData');
lnames
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
#1. 可视化全部基因网络
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.
dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = 6);
# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
plotTOM = dissTOM^7;
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA;
# Call the plot function
#sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, moduleColors, main = 'Network heatmap plot, all genes')
#随便选取1000个基因来可视化
nSelect = 1000
# For reproducibility, we set the random seed
set.seed(10);
select = sample(nGenes, size = nSelect);
selectTOM = dissTOM[select, select];
# There's no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = 'average')
selectColors = moduleColors[select];
# Open a graphical window
#sizeGrWindow(9,9)
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
# the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^7;
diag(plotDiss) = NA;
TOMplot(plotDiss, selectTree, selectColors, main = 'Network heatmap plot, selected genes')
#=====================================================================================
#  第二种:一步步的进行网络构建
#=====================================================================================
###################Step-by-step network construction and module detection
#2.选择合适的阀值,同上
#3. 网络构建:(1) Co-expression similarity and adjacency
softPower = 6;
adjacency = adjacency(datExpr, power = softPower);
#(2) 邻近矩阵到拓扑矩阵的转换,Turn adjacency into topological overlap
TOM = TOMsimilarity(adjacency);
dissTOM = 1-TOM
# (3) 聚类拓扑矩阵
#Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = 'average');
# Plot the resulting clustering tree (dendrogram)
#sizeGrWindow(12,9)
plot(geneTree, xlab='', sub='', main = 'Gene clustering on TOM-based dissimilarity',
     labels = FALSE, hang = 0.04);
#(4) 聚类分支的休整dynamicTreeCut
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 30;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
                            deepSplit = 2, pamRespectsDendro = FALSE,
                            minClusterSize = minModuleSize);
table(dynamicMods)
#4. 绘画结果展示
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
# Plot the dendrogram and colors underneath
#sizeGrWindow(8,6)
plotDendroAndColors(geneTree, dynamicColors, 'Dynamic Tree Cut',
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05,
                    main = 'Gene dendrogram and module colors')
#5. 聚类结果相似模块的融合,Merging of modules whose expression profiles are very similar
#在聚类树中每一leaf是一个短线,代表一个基因,
#不同分之间靠的越近表示有高的共表达基因,将共表达极其相似的modules进行融合
# Calculate eigengenes
MEList = moduleEigengenes(datExpr, colors = dynamicColors)
MEs = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs);
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = 'average');
# Plot the result
#sizeGrWindow(7, 6)
plot(METree, main = 'Clustering of module eigengenes',
     xlab = '', sub = '')
#选择有75%相关性的进行融合
MEDissThres = 0.25
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = 'red')
# Call an automatic merging function
merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
# The merged module colors
mergedColors = merge$colors;
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;
#绘制融合前(Dynamic Tree Cut)和融合后(Merged dynamic)的聚类图
#sizeGrWindow(12, 9)
#pdf(file = 'Plots/geneDendro-3.pdf', wi = 9, he = 6)
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
                    c('Dynamic Tree Cut', 'Merged dynamic'),
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
#dev.off()
# 只是绘制融合后聚类图
plotDendroAndColors(geneTree,mergedColors,'Merged dynamic',
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
#5.结果保存
# Rename to moduleColors
moduleColors = mergedColors
# Construct numerical labels corresponding to the colors
colorOrder = c('grey', standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1;
MEs = mergedMEs;
# Save module colors and labels for use in subsequent parts
save(MEs, moduleLabels, moduleColors, geneTree, file = 'AS-green-FPKM-02-networkConstruction-stepByStep.RData')
#6. 导出网络到Cytoscape
# Recalculate topological overlap if needed
TOM = TOMsimilarityFromExpr(datExpr, power = 6);
# Read in the annotation file
# annot = read.csv(file = 'GeneAnnotation.csv');
# Select modules需要修改
modules = c('brown', 'red');
# Select module probes
probes = names(datExpr)
inModule = is.finite(match(moduleColors, modules));
modProbes = probes[inModule];
#modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
cyt = exportNetworkToCytoscape(modTOM,
                               edgeFile = paste('AS-green-FPKM-Step-by-step-CytoscapeInput-edges-', paste(modules, collapse='-'), '.txt', sep=''),
                               nodeFile = paste('AS-green-FPKM-Step-by-step-CytoscapeInput-nodes-', paste(modules, collapse='-'), '.txt', sep=''),
                               weighted = TRUE,
                               threshold = 0.02,
                               nodeNames = modProbes,
                               #altNodeNames = modGenes,
                               nodeAttr = moduleColors[inModule]);
#=====================================================================================
#  分析网络可视化,用heatmap可视化权重网络,heatmap每一行或列对应一个基因,颜色越深表示有较高的邻近
#=====================================================================================
options(stringsAsFactors = FALSE);
lnames = load(file = 'AS-green-FPKM-01-dataInput.RData');
lnames
lnames = load(file = 'AS-green-FPKM-02-networkConstruction-stepByStep.RData');
lnames
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
#1. 可视化全部基因网络
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.
dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = 6);
# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
plotTOM = dissTOM^7;
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA;
# Call the plot function
#sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, moduleColors, main = 'Network heatmap plot, all genes')
#随便选取1000个基因来可视化
nSelect = 1000
# For reproducibility, we set the random seed
set.seed(10);
select = sample(nGenes, size = nSelect);
selectTOM = dissTOM[select, select];
# There's no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = 'average')
selectColors = moduleColors[select];
# Open a graphical window
#sizeGrWindow(9,9)
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
# the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^7;
diag(plotDiss) = NA;
TOMplot(plotDiss, selectTree, selectColors, main = 'Network heatmap plot, selected genes')
#此处画的是根据基因间表达量进行聚类所得到的各模块间的相关性图
MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes
MET = orderMEs(MEs)
sizeGrWindow(7, 6)
plotEigengeneNetworks(MET, 'Eigengene adjacency heatmap', marHeatmap = c(3,4,2,2), plotDendrograms = FALSE, xLabelsAngle = 90)

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多