学习WGCNA总结 | Public Library of Bioinformatics

yjt2004us 2018-06-15

展开全文

setwd('F:/WGCNA')

library(WGCNA)

options(stringsAsFactors = FALSE)

enableWGCNAThreads()

#1. 数据读入，处理和保存

fpkm<- read.csv('trans_counts.counts.matrix.TMM_normalized.FPKM.nozero.csv')

head(fpkm)

dim(fpkm)

names(fpkm)

datExpr0=as.data.frame(t(fpkm[,-c(1)]));

names(datExpr0)=fpkm$trans;

rownames(datExpr0)=names(fpkm)[-c(1)];

#data<-log10(date[,-1]+0.01)

gsg = goodSamplesGenes(datExpr0, verbose = 3);

gsg$allOK

sampleTree = hclust(dist(datExpr0), method = 'average')

#sizeGrWindow(12,9)

par(cex = 0.6)

par(mar = c(0,4,2,0))

plot(sampleTree, main = 'Sample clustering to detect outliers', sub='', xlab='', cex.lab = 1.5,

cex.axis = 1.5, cex.main = 2)

abline(h = 80000, col = 'red');

clust = cutreeStatic(sampleTree, cutHeight = 80000, minSize = 10)

table(clust)

keepSamples = (clust==1)

datExpr = datExpr0[keepSamples, ]

nGenes = ncol(datExpr)

nSamples = nrow(datExpr)

save(datExpr, file = 'AS-green-FPKM-01-dataInput.RData')

#2. 选择合适的阀值

powers = c(c(1:10), seq(from = 12, to=20, by=2))

# Call the network topology analysis function

sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)

# Plot the results:

##sizeGrWindow(9, 5)

par(mfrow = c(1,2));

cex1 = 0.9;

# Scale-free topology fit index as a function of the soft-thresholding power

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],

xlab='Soft Threshold (power)',ylab='Scale Free Topology Model Fit,signed R^2',type='n',

main = paste('Scale independence'));

text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],

labels=powers,cex=cex1,col='red');

# this line corresponds to using an R^2 cut-off of h

abline(h=0.90,col='red')

# Mean connectivity as a function of the soft-thresholding power

plot(sft$fitIndices[,1], sft$fitIndices[,5],

xlab='Soft Threshold (power)',ylab='Mean Connectivity', type='n',

main = paste('Mean connectivity'))

text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col='red')

#=====================================================================================

# 网络构建有两种方法，One-step和Step-by-step；

# 第一种：一步法进行网络构建

#=====================================================================================

#3. 一步法网络构建：One-step network construction and module detection

net = blockwiseModules(datExpr, power = 6, maxBlockSize = 6000,

TOMType = 'unsigned', minModuleSize = 30,

reassignThreshold = 0, mergeCutHeight = 0.25,

numericLabels = TRUE, pamRespectsDendro = FALSE,

saveTOMs = TRUE,

saveTOMFileBase = 'AS-green-FPKM-TOM',

verbose = 3)

table(net$colors)

#4. 绘画结果展示

# open a graphics window

#sizeGrWindow(12, 9)

# Convert labels to colors for plotting

mergedColors = labels2colors(net$colors)

# Plot the dendrogram and the module colors underneath

plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],

'Module colors',

dendroLabels = FALSE, hang = 0.03,

addGuide = TRUE, guideHang = 0.05)

#5.结果保存

moduleLabels = net$colors

moduleColors = labels2colors(net$colors)

table(moduleColors)

MEs = net$MEs;

geneTree = net$dendrograms[[1]];

save(MEs, moduleLabels, moduleColors, geneTree,

file = 'AS-green-FPKM-02-networkConstruction-auto.RData')

#6. 导出网络到Cytoscape

# Recalculate topological overlap if needed

TOM = TOMsimilarityFromExpr(datExpr, power = 6);

# Read in the annotation file

# annot = read.csv(file = 'GeneAnnotation.csv');

# Select modules需要修改，选择需要导出的模块颜色

modules = c('turquoise', 'blue');

# Select module probes选择模块探测

probes = names(datExpr)

inModule = is.finite(match(moduleColors, modules));

modProbes = probes[inModule];

#modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];

# Select the corresponding Topological Overlap

modTOM = TOM[inModule, inModule];

dimnames(modTOM) = list(modProbes, modProbes)

# Export the network into edge and node list files Cytoscape can read

cyt = exportNetworkToCytoscape(modTOM,

edgeFile = paste('AS-green-FPKM-One-step-CytoscapeInput-edges-', paste(modules, collapse='-'), '.txt', sep=''),

nodeFile = paste('AS-green-FPKM-One-step-CytoscapeInput-nodes-', paste(modules, collapse='-'), '.txt', sep=''),

weighted = TRUE,

threshold = 0.02,

nodeNames = modProbes,

#altNodeNames = modGenes,

nodeAttr = moduleColors[inModule]);

#=====================================================================================

# 分析网络可视化，用heatmap可视化权重网络，heatmap每一行或列对应一个基因，颜色越深表示有较高的邻近

#=====================================================================================

options(stringsAsFactors = FALSE);

lnames = load(file = 'AS-green-FPKM-01-dataInput.RData');

lnames

lnames = load(file = 'AS-green-FPKM-02-networkConstruction-auto.RData');

lnames

nGenes = ncol(datExpr)

nSamples = nrow(datExpr)

#1. 可视化全部基因网络

# Calculate topological overlap anew: this could be done more efficiently by saving the TOM

# calculated during module detection, but let us do it again here.

dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = 6);

# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap

plotTOM = dissTOM^7;

# Set diagonal to NA for a nicer plot

diag(plotTOM) = NA;

# Call the plot function

#sizeGrWindow(9,9)

TOMplot(plotTOM, geneTree, moduleColors, main = 'Network heatmap plot, all genes')

#随便选取1000个基因来可视化

nSelect = 1000

# For reproducibility, we set the random seed

set.seed(10);

select = sample(nGenes, size = nSelect);

selectTOM = dissTOM[select, select];

# There's no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.

selectTree = hclust(as.dist(selectTOM), method = 'average')

selectColors = moduleColors[select];

# Open a graphical window

#sizeGrWindow(9,9)

# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing

# the color palette; setting the diagonal to NA also improves the clarity of the plot

plotDiss = selectTOM^7;

diag(plotDiss) = NA;

TOMplot(plotDiss, selectTree, selectColors, main = 'Network heatmap plot, selected genes')

#=====================================================================================

# 第二种：一步步的进行网络构建

#=====================================================================================

###################Step-by-step network construction and module detection

#2.选择合适的阀值，同上

#3. 网络构建：(1) Co-expression similarity and adjacency

softPower = 6;

adjacency = adjacency(datExpr, power = softPower);

#(2) 邻近矩阵到拓扑矩阵的转换，Turn adjacency into topological overlap

TOM = TOMsimilarity(adjacency);

dissTOM = 1-TOM

# (3) 聚类拓扑矩阵

#Call the hierarchical clustering function

geneTree = hclust(as.dist(dissTOM), method = 'average');

# Plot the resulting clustering tree (dendrogram)

#sizeGrWindow(12,9)

plot(geneTree, xlab='', sub='', main = 'Gene clustering on TOM-based dissimilarity',

labels = FALSE, hang = 0.04);

#(4) 聚类分支的休整dynamicTreeCut

# We like large modules, so we set the minimum module size relatively high:

minModuleSize = 30;

# Module identification using dynamic tree cut:

dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,

deepSplit = 2, pamRespectsDendro = FALSE,

minClusterSize = minModuleSize);

table(dynamicMods)

#4. 绘画结果展示

# Convert numeric lables into colors

dynamicColors = labels2colors(dynamicMods)

table(dynamicColors)

# Plot the dendrogram and colors underneath

#sizeGrWindow(8,6)

plotDendroAndColors(geneTree, dynamicColors, 'Dynamic Tree Cut',

dendroLabels = FALSE, hang = 0.03,

addGuide = TRUE, guideHang = 0.05,

main = 'Gene dendrogram and module colors')

#5. 聚类结果相似模块的融合，Merging of modules whose expression profiles are very similar

#在聚类树中每一leaf是一个短线，代表一个基因，

#不同分之间靠的越近表示有高的共表达基因，将共表达极其相似的modules进行融合

# Calculate eigengenes

MEList = moduleEigengenes(datExpr, colors = dynamicColors)

MEs = MEList$eigengenes

# Calculate dissimilarity of module eigengenes

MEDiss = 1-cor(MEs);

# Cluster module eigengenes

METree = hclust(as.dist(MEDiss), method = 'average');

# Plot the result

#sizeGrWindow(7, 6)

plot(METree, main = 'Clustering of module eigengenes',

xlab = '', sub = '')

#选择有75%相关性的进行融合

MEDissThres = 0.25

# Plot the cut line into the dendrogram

abline(h=MEDissThres, col = 'red')

# Call an automatic merging function

merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)

# The merged module colors

mergedColors = merge$colors;

# Eigengenes of the new merged modules:

mergedMEs = merge$newMEs;

#绘制融合前(Dynamic Tree Cut)和融合后(Merged dynamic)的聚类图

#sizeGrWindow(12, 9)

#pdf(file = 'Plots/geneDendro-3.pdf', wi = 9, he = 6)

plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),

c('Dynamic Tree Cut', 'Merged dynamic'),

dendroLabels = FALSE, hang = 0.03,

addGuide = TRUE, guideHang = 0.05)

#dev.off()

# 只是绘制融合后聚类图

plotDendroAndColors(geneTree,mergedColors,'Merged dynamic',

dendroLabels = FALSE, hang = 0.03,

addGuide = TRUE, guideHang = 0.05)

#5.结果保存

# Rename to moduleColors

moduleColors = mergedColors

# Construct numerical labels corresponding to the colors

colorOrder = c('grey', standardColors(50));

moduleLabels = match(moduleColors, colorOrder)-1;

MEs = mergedMEs;

# Save module colors and labels for use in subsequent parts

save(MEs, moduleLabels, moduleColors, geneTree, file = 'AS-green-FPKM-02-networkConstruction-stepByStep.RData')

#6. 导出网络到Cytoscape

# Recalculate topological overlap if needed

TOM = TOMsimilarityFromExpr(datExpr, power = 6);

# Read in the annotation file

# annot = read.csv(file = 'GeneAnnotation.csv');

# Select modules需要修改

modules = c('brown', 'red');

# Select module probes

probes = names(datExpr)

inModule = is.finite(match(moduleColors, modules));

modProbes = probes[inModule];

#modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];

# Select the corresponding Topological Overlap

modTOM = TOM[inModule, inModule];

dimnames(modTOM) = list(modProbes, modProbes)

# Export the network into edge and node list files Cytoscape can read

cyt = exportNetworkToCytoscape(modTOM,

edgeFile = paste('AS-green-FPKM-Step-by-step-CytoscapeInput-edges-', paste(modules, collapse='-'), '.txt', sep=''),

nodeFile = paste('AS-green-FPKM-Step-by-step-CytoscapeInput-nodes-', paste(modules, collapse='-'), '.txt', sep=''),

weighted = TRUE,

threshold = 0.02,

nodeNames = modProbes,

#altNodeNames = modGenes,

nodeAttr = moduleColors[inModule]);

#=====================================================================================

# 分析网络可视化，用heatmap可视化权重网络，heatmap每一行或列对应一个基因，颜色越深表示有较高的邻近

#=====================================================================================

options(stringsAsFactors = FALSE);

lnames = load(file = 'AS-green-FPKM-01-dataInput.RData');

lnames

lnames = load(file = 'AS-green-FPKM-02-networkConstruction-stepByStep.RData');

lnames

nGenes = ncol(datExpr)

nSamples = nrow(datExpr)

#1. 可视化全部基因网络

# Calculate topological overlap anew: this could be done more efficiently by saving the TOM

# calculated during module detection, but let us do it again here.

dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = 6);

# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap

plotTOM = dissTOM^7;

# Set diagonal to NA for a nicer plot

diag(plotTOM) = NA;

# Call the plot function

#sizeGrWindow(9,9)

TOMplot(plotTOM, geneTree, moduleColors, main = 'Network heatmap plot, all genes')

#随便选取1000个基因来可视化

nSelect = 1000

# For reproducibility, we set the random seed

set.seed(10);

select = sample(nGenes, size = nSelect);

selectTOM = dissTOM[select, select];

# There's no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.

selectTree = hclust(as.dist(selectTOM), method = 'average')

selectColors = moduleColors[select];

# Open a graphical window

#sizeGrWindow(9,9)

# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing

# the color palette; setting the diagonal to NA also improves the clarity of the plot

plotDiss = selectTOM^7;

diag(plotDiss) = NA;

TOMplot(plotDiss, selectTree, selectColors, main = 'Network heatmap plot, selected genes')

#此处画的是根据基因间表达量进行聚类所得到的各模块间的相关性图

MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes

MET = orderMEs(MEs)

sizeGrWindow(7, 6)

plotEigengeneNetworks(MET, 'Eigengene adjacency heatmap', marHeatmap = c(3,4,2,2), plotDendrograms = FALSE, xLabelsAngle = 90)

本站是提供个人知识管理的网络存储空间，所有内容均由用户发布，不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息，谨防诈骗。如发现有害或侵权内容，请点击一键举报。

转藏分享

QQ空间 QQ好友新浪微博微信

献花（0） +1

来自： yjt2004us > 《待分类》

举报/认领

0条评论

发表

请遵守用户评论公约

类似文章 更多

yjt2004us

关注对话

TA的最新馆藏

生信分析过程中这些常见文件的格式以及查看方式你都知道吗？
献给初学者，手把手教你如何快速查找基因序列信息！
用EndNote编辑最适合投稿杂志的参考文献格式（适用所有杂志）
使用Python实现文件压缩和解压
出于无聊，我给你们分析了一下去年灌的生信文章
功能这么强的基因，作者怎么找到的？

喜欢该文的人也喜欢更多

热门阅读换一换