heatmap.R# ============================================================ # Tutorial on drawing a heatmap using ggplot2 # by Umer Zeeshan Ijaz (http://userweb.eng./umer.ijaz) # ============================================================= abund_table<-read.csv('SPE_pitlatrine.csv',row.names=1,check.names=FALSE) # Transpose the data to have sample names on rows abund_table<-t(abund_table) # Convert to relative frequencies abund_table <- abund_table/rowSums(abund_table) library(reshape2) df<-melt(abund_table) colnames(df)<-c('Samples','Species','Value') library(plyr) library(scales) # We are going to apply transformation to our data to make it # easier on eyes #df<-ddply(df,.(Samples),transform,rescale=scale(Value)) df<-ddply(df,.(Samples),transform,rescale=sqrt(Value)) # Plot heatmap p <- ggplot(df, aes(Species, Samples)) + geom_tile(aes(fill = rescale),colour = 'white') + scale_fill_gradient(low = 'white',high = 'steelblue')+ scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0)) + theme(legend.position = 'none',axis.ticks = element_blank(),axis.text.x = element_text(angle = 90, hjust = 1,size=5),axis.text.y = element_text(size=5)) pdf('Heatmap.pdf') print(p) dev.off() 以上代码将产生以下热图: TAXAplot.R (群落结构堆积图)# ============================================================ 以上代码将产生以下堆积图: Correlation.R (相关性热图)# ============================================================ # Tutorial on drawing a correlation map using ggplot2 # by Umer Zeeshan Ijaz (http://userweb.eng./umer.ijaz) # ============================================================= abund_table<-read.csv('SPE_pitlatrine.csv',row.names=1,check.names=FALSE) #Transpose the data to have sample names on rows abund_table<-t(abund_table) meta_table<-read.csv('ENV_pitlatrine.csv',row.names=1,check.names=FALSE) #Filter out samples with fewer counts abund_table<-abund_table[rowSums(abund_table)>200,] #Extract the corresponding meta_table for the samples in abund_table meta_table<-meta_table[rownames(abund_table),] #You can use sel_env to specify the variables you want to use and sel_env_label to specify the labes for the pannel sel_env<-c('pH','Temp','TS','VS','VFA','CODt','CODs','perCODsbyt','NH4','Prot','Carbo') sel_env_label <- list( 'pH'='PH', 'Temp'='Temperature', 'TS'='TS', 'VS'='VS', 'VFA'='VFA', 'CODt'='CODt', 'CODs'='CODs', 'perCODsbyt'='%CODs/t', 'NH4'='NH4', 'Prot'='Protein', 'Carbo'='Carbon' ) sel_env_label<-t(as.data.frame(sel_env_label)) sel_env_label<-as.data.frame(sel_env_label) colnames(sel_env_label)<-c('Trans') sel_env_label$Trans<-as.character(sel_env_label$Trans) #Now get a filtered table based on sel_env meta_table_filtered<-meta_table[,sel_env] abund_table_filtered<-abund_table[rownames(meta_table_filtered),] #Apply normalisation (either use relative or log-relative transformation) #x<-abund_table_filtered/rowSums(abund_table_filtered) x<-log((abund_table_filtered+1)/(rowSums(abund_table_filtered)+dim(abund_table_filtered)[2])) x<-x[,order(colSums(x),decreasing=TRUE)] #Extract list of top N Taxa N<-51 taxa_list<-colnames(x)[1:N] #remove '__Unknown__' and add it to others taxa_list<-taxa_list[!grepl('Unknown',taxa_list)] N<-length(taxa_list) x<-data.frame(x[,colnames(x) %in% taxa_list]) y<-meta_table_filtered #Get grouping information grouping_info<-data.frame(row.names=rownames(abund_table),t(as.data.frame(strsplit(rownames(abund_table),'_')))) # > head(grouping_info) # X1 X2 X3 # T_2_1 T 2 1 # T_2_10 T 2 10 # T_2_12 T 2 12 # T_2_2 T 2 2 # T_2_3 T 2 3 # T_2_6 T 2 6 #Let us group on countries groups<-grouping_info[,1] #You can use kendall, spearman, or pearson below: method<-'kendall' #Now calculate the correlation between individual Taxa and the environmental data df<-NULL for(i in colnames(x)){ for(j in colnames(y)){ for(k in unique(groups)){ a<-x[groups==k,i,drop=F] b<-y[groups==k,j,drop=F] tmp<-c(i,j,cor(a[complete.cases(b),],b[complete.cases(b),],use='everything',method=method),cor.test(a[complete.cases(b),],b[complete.cases(b),],method=method)$p.value,k) if(is.null(df)){ df<-tmp } else{ df<-rbind(df,tmp) } } } } df<-data.frame(row.names=NULL,df) colnames(df)<-c('Taxa','Env','Correlation','Pvalue','Type') df$Pvalue<-as.numeric(as.character(df$Pvalue)) df$AdjPvalue<-rep(0,dim(df)[1]) df$Correlation<-as.numeric(as.character(df$Correlation)) #You can adjust the p-values for multiple comparison using Benjamini & Hochberg (1995): # 1 -> donot adjust # 2 -> adjust Env + Type (column on the correlation plot) # 3 -> adjust Taxa + Type (row on the correlation plot for each type) # 4 -> adjust Taxa (row on the correlation plot) # 5 -> adjust Env (panel on the correlation plot) adjustment_label<-c('NoAdj','AdjEnvAndType','AdjTaxaAndType','AdjTaxa','AdjEnv') adjustment<-5 if(adjustment==1){ df$AdjPvalue<-df$Pvalue } else if (adjustment==2){ for(i in unique(df$Env)){ for(j in unique(df$Type)){ sel<-df$Env==i & df$Type==j df$AdjPvalue[sel]<-p.adjust(df$Pvalue[sel],method='BH') } } } else if (adjustment==3){ for(i in unique(df$Taxa)){ for(j in unique(df$Type)){ sel<-df$Taxa==i & df$Type==j df$AdjPvalue[sel]<-p.adjust(df$Pvalue[sel],method='BH') } } } else if (adjustment==4){ for(i in unique(df$Taxa)){ sel<-df$Taxa==i df$AdjPvalue[sel]<-p.adjust(df$Pvalue[sel],method='BH') } } else if (adjustment==5){ for(i in unique(df$Env)){ sel<-df$Env==i df$AdjPvalue[sel]<-p.adjust(df$Pvalue[sel],method='BH') } } #Now we generate the labels for signifant values df$Significance<-cut(df$AdjPvalue, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c('***', '**', '*', '')) #We ignore NAs df<-df[complete.cases(df),] #We want to reorganize the Env data based on they appear df$Env<-factor(df$Env,as.character(df$Env)) #We use the function to change the labels for facet_grid in ggplot2 Env_labeller <- function(variable,value){ return(sel_env_label[as.character(value),'Trans']) } p <- ggplot(aes(x=Type, y=Taxa, fill=Correlation), data=df) p <- p + geom_tile() + scale_fill_gradient2(low='#2C7BB6', mid='white', high='#D7191C') p<-p+theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5)) p<-p+geom_text(aes(label=Significance), color='black', size=3)+labs(y=NULL, x=NULL, fill=method) p<-p+facet_grid(. ~ Env, drop=TRUE,scale='free',space='free_x',labeller=Env_labeller) pdf(paste('Correlation_',adjustment_label[adjustment],'.pdf',sep=''),height=8,width=22) print(p) dev.off() 以上代码将产生以下相关性指数所做的热图: |
|