NMDS_ordisurf.R# ============================================================ # Tutorial on drawing an NMDS plot with environmental contours using ggplot2 # by Umer Zeeshan Ijaz (http://userweb.eng./umer.ijaz) # ============================================================= abund_table<-read.csv('SPE_pitlatrine.csv',row.names=1,check.names=FALSE) #Transpose the data to have sample names on rows abund_table<-t(abund_table) meta_table<-read.csv('ENV_pitlatrine.csv',row.names=1,check.names=FALSE) #Just a check to ensure that the samples in meta_table are in the same order as in abund_table meta_table<-meta_table[rownames(abund_table),] #Get grouping information grouping_info<-data.frame(row.names=rownames(abund_table),t(as.data.frame(strsplit(rownames(abund_table),'_')))) # > head(grouping_info) # X1 X2 X3 # T_2_1 T 2 1 # T_2_10 T 2 10 # T_2_12 T 2 12 # T_2_2 T 2 2 # T_2_3 T 2 3 # T_2_6 T 2 6 #Let us group on countries groups<-grouping_info[,1] library(vegan) #Get MDS stats sol<-metaMDS(abund_table,distance = 'bray', k = 2, trymax = 50) #We use meta_table$Temp to plot temperature values on the plot. You can select #any other variable #> names(meta_table) #[1] 'pH' 'Temp' 'TS' 'VS' 'VFA' 'CODt' #[7] 'CODs' 'perCODsbyt' 'NH4' 'Prot' 'Carbo' #Reference:http://oliviarata./2014/07/17/ordinations-in-ggplot2-v2-ordisurf/ ordi<-ordisurf(sol,meta_table$Temp,plot = FALSE, bs='ds') ordi.grid <- ordi$grid #extracts the ordisurf object str(ordi.grid) #it's a list though - cannot be plotted as is ordi.mite <- expand.grid(x = ordi.grid$x, y = ordi.grid$y) #get x and ys ordi.mite$z <- as.vector(ordi.grid$z) #unravel the matrix for the z scores ordi.mite.na <- data.frame(na.omit(ordi.mite)) #gets rid of the nas NMDS=data.frame(x=sol$point[,1],y=sol$point[,2],Type=groups) p<-ggplot()+ stat_contour(data = ordi.mite.na, aes(x = x, y = y, z = z, colour = ..level..),positon='identity')+ #can change the binwidth depending on how many contours you want geom_point(data=NMDS,aes(x,y,fill=Type),pch=21,size=3,colour=NA) p<-p+scale_colour_continuous(high = 'darkgreen', low = 'darkolivegreen1') #here we set the high and low of the colour scale. Can delete to go back to the standard blue, or specify others p<-p+labs(colour = 'Temperature') #another way to set the labels, in this case, for the colour legend p<-p+theme_bw() #p<-p+theme(legend.key = element_blank(), #removes the box around each legend item # legend.position = 'bottom', #legend at the bottom # legend.direction = 'horizontal', # legend.box = 'horizontal', # legend.box.just = 'centre') pdf('NMDS_ordisurf.pdf',width=10,height=10) print(p) dev.off() 以上代码产生下图: KW.R# ============================================================ # Tutorial on finding significant taxa and then plotting them using ggplot2 # by Umer Zeeshan Ijaz (http://userweb.eng./umer.ijaz) # ============================================================= library(reshape) library(ggplot2) #Load the abundance table abund_table<-read.csv('SPE_pitlatrine.csv',row.names=1,check.names=FALSE) #Transpose the data to have sample names on rows abund_table<-t(abund_table) #Get grouping information grouping_info<-data.frame(row.names=rownames(abund_table),t(as.data.frame(strsplit(rownames(abund_table),'_')))) # > head(grouping_info) # X1 X2 X3 # T_2_1 T 2 1 # T_2_10 T 2 10 # T_2_12 T 2 12 # T_2_2 T 2 2 # T_2_3 T 2 3 # T_2_6 T 2 6 #Use countries as grouping information groups<-as.factor(grouping_info[,1]) #Apply normalisation (either use relative or log-relative transformation) #data<-abund_table/rowSums(abund_table) data<-log((abund_table+1)/(rowSums(abund_table)+dim(abund_table)[2])) data<-as.data.frame(data) #Reference: http://www.bigre./courses/statistics_bioinformatics/practicals/microarrays_berry_2010/berry_feature_selection.html kruskal.wallis.alpha=0.01 kruskal.wallis.table <- data.frame() for (i in 1:dim(data)[2]) { ks.test <- kruskal.test(data[,i], g=groups) # Store the result in the data frame kruskal.wallis.table <- rbind(kruskal.wallis.table, data.frame(id=names(data)[i], p.value=ks.test$p.value )) # Report number of values tested cat(paste('Kruskal-Wallis test for ',names(data)[i],' ', i, '/', dim(data)[2], '; p-value=', ks.test$p.value,'\n', sep='')) } kruskal.wallis.table$E.value <- kruskal.wallis.table$p.value * dim(kruskal.wallis.table)[1] kruskal.wallis.table$FWER <- pbinom(q=0, p=kruskal.wallis.table$p.value, size=dim(kruskal.wallis.table)[1], lower.tail=FALSE) kruskal.wallis.table <- kruskal.wallis.table[order(kruskal.wallis.table$p.value, decreasing=FALSE), ] kruskal.wallis.table$q.value.factor <- dim(kruskal.wallis.table)[1] / 1:dim(kruskal.wallis.table)[1] kruskal.wallis.table$q.value <- kruskal.wallis.table$p.value * kruskal.wallis.table$q.value.factor pdf('KW_correction.pdf') plot(kruskal.wallis.table$p.value, kruskal.wallis.table$E.value, main='Multitesting corrections', xlab='Nominal p-value', ylab='Multitesting-corrected statistics', log='xy', col='blue', panel.first=grid(col='#BBBBBB',lty='solid')) lines(kruskal.wallis.table$p.value, kruskal.wallis.table$FWER, pch=20,col='darkgreen', type='p' ) lines(kruskal.wallis.table$p.value, kruskal.wallis.table$q.value, pch='+',col='darkred', type='p' ) abline(h=kruskal.wallis.alpha, col='red', lwd=2) legend('topleft', legend=c('E-value', 'p-value', 'q-value'), col=c('blue', 'darkgreen','darkred'), lwd=2,bg='white',bty='o') dev.off() last.significant.element <- max(which(kruskal.wallis.table$q.value <= kruskal.wallis.alpha)) selected <- 1:last.significant.element diff.cat.factor <- kruskal.wallis.table$id[selected] diff.cat <- as.vector(diff.cat.factor) print(kruskal.wallis.table[selected,]) #Now we plot taxa significantly different between the categories df<-NULL for(i in diff.cat){ tmp<-data.frame(data[,i],groups,rep(paste(i,' q = ',round(kruskal.wallis.table[kruskal.wallis.table$id==i,'q.value'],5),sep=''),dim(data)[1])) if(is.null(df)){df<-tmp} else { df<-rbind(df,tmp)} } colnames(df)<-c('Value','Type','Taxa') p<-ggplot(df,aes(Type,Value,colour=Type))+ylab('Log-relative normalised') p<-p+geom_boxplot()+geom_jitter()+theme_bw()+ facet_wrap( ~ Taxa , scales='free', ncol=3) p<-p+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) pdf('KW_significant.pdf',width=10,height=14) print(p) dev.off() 以上代码产生以下两个图: NB.RThe DESeq {DESeq2} package allows negative binomial GLM fitting and Wald statistics for abundance data. You can use this script as an alternative to KW.R to find taxa that are significantly different between different conditions. # ============================================================
# Tutorial on finding significant taxa and then plotting them using ggplot2 # by Umer Zeeshan Ijaz (http://userweb.eng./umer.ijaz) # ============================================================= library(ggplot2) library(DESeq2) #Load the abundance table abund_table<-read.csv('SPE_pitlatrine.csv',row.names=1,check.names=FALSE) #Transpose the data to have sample names on rows abund_table<-t(abund_table) #Get grouping information grouping_info<-data.frame(row.names=rownames(abund_table),t(as.data.frame(strsplit(rownames(abund_table),'_')))) # > head(grouping_info) # X1 X2 X3 # T_2_1 T 2 1 # T_2_10 T 2 10 # T_2_12 T 2 12 # T_2_2 T 2 2 # T_2_3 T 2 3 # T_2_6 T 2 6 #We will convert our table to DESeqDataSet object countData = round(as(abund_table, 'matrix'), digits = 0) # We will add 1 to the countData otherwise DESeq will fail with the error: # estimating size factors # Error in estimateSizeFactorsForMatrix(counts(object), locfunc = locfunc, : # every gene contains at least one zero, cannot compute log geometric means countData<-(t(countData+1)) dds <- DESeqDataSetFromMatrix(countData, grouping_info, as.formula(~ X1)) #Reference:https://github.com/MadsAlbertsen/ampvis/blob/master/R/amp_test_species.R #Differential expression analysis based on the Negative Binomial (a.k.a. Gamma-Poisson) distribution #Some reason this doesn't work: data_deseq_test = DESeq(dds, test='wald', fitType='parametric') data_deseq_test = DESeq(dds) ## Extract the results res = results(data_deseq_test, cooksCutoff = FALSE) res_tax = cbind(as.data.frame(res), as.matrix(countData[rownames(res), ]), OTU = rownames(res)) sig = 0.001 fold = 0 plot.point.size = 2 label=T tax.display = NULL tax.aggregate = 'OTU' res_tax_sig = subset(res_tax, padj < sig & fold < abs(log2FoldChange)) res_tax_sig <- res_tax_sig[order(res_tax_sig$padj),] ## Plot the data ### MA plot res_tax$Significant <- ifelse(rownames(res_tax) %in% rownames(res_tax_sig) , 'Yes', 'No') res_tax$Significant[is.na(res_tax$Significant)] <- 'No' p1 <- ggplot(data = res_tax, aes(x = baseMean, y = log2FoldChange, color = Significant)) + geom_point(size = plot.point.size) + scale_x_log10() + scale_color_manual(values=c('black', 'red')) + labs(x = 'Mean abundance', y = 'Log2 fold change')+theme_bw() if(label == T){ if (!is.null(tax.display)){ rlab <- data.frame(res_tax, Display = apply(res_tax[,c(tax.display, tax.aggregate)], 1, paste, collapse='; ')) } else { rlab <- data.frame(res_tax, Display = res_tax[,tax.aggregate]) } p1 <- p1 + geom_text(data = subset(rlab, Significant == 'Yes'), aes(label = Display), size = 4, vjust = 1) } pdf('NB_MA.pdf') print(p1) dev.off() res_tax_sig_abund = cbind(as.data.frame(countData[rownames(res_tax_sig), ]), OTU = rownames(res_tax_sig), padj = res_tax[rownames(res_tax_sig),'padj']) #Apply normalisation (either use relative or log-relative transformation) #data<-abund_table/rowSums(abund_table) data<-log((abund_table+1)/(rowSums(abund_table)+dim(abund_table)[2])) data<-as.data.frame(data) #Now we plot taxa significantly different between the categories df<-NULL for(i in res_tax[rownames(res_tax_sig),'OTU']){ tmp<-data.frame(data[,i],groups,rep(paste(i,' padj = ',round(res_tax[i,'padj'],5),sep=''),dim(data)[1])) if(is.null(df)){df<-tmp} else { df<-rbind(df,tmp)} } colnames(df)<-c('Value','Type','Taxa') p<-ggplot(df,aes(Type,Value,colour=Type))+ylab('Log-relative normalised') p<-p+geom_boxplot()+geom_jitter()+theme_bw()+ facet_wrap( ~ Taxa , scales='free', ncol=3) p<-p+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) pdf('NB_significant.pdf',width=10,height=14) print(p) dev.off() 使用以上代码,我们将得到以下图: 欢迎推广我们的公众号: |
|