R语言代码：用于微生物生态数据R code for ecological data analysis...

生物_医药_科研 2021-12-22

展开全文

heatmap.R

# ============================================================
# Tutorial on drawing a heatmap using ggplot2
# by Umer Zeeshan Ijaz (http://userweb.eng./umer.ijaz)
# =============================================================
abund_table<-read.csv('SPE_pitlatrine.csv',row.names=1,check.names=FALSE)
# Transpose the data to have sample names on rows
abund_table<-t(abund_table)
# Convert to relative frequencies
abund_table <- abund_table/rowSums(abund_table)
library(reshape2)
df<-melt(abund_table)
colnames(df)<-c('Samples','Species','Value')
library(plyr)
library(scales)

# We are going to apply transformation to our data to make it
# easier on eyes

#df<-ddply(df,.(Samples),transform,rescale=scale(Value))
df<-ddply(df,.(Samples),transform,rescale=sqrt(Value))

# Plot heatmap
p <- ggplot(df, aes(Species, Samples)) +
geom_tile(aes(fill = rescale),colour = 'white') +
scale_fill_gradient(low = 'white',high = 'steelblue')+
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) + theme(legend.position = 'none',axis.ticks = element_blank(),axis.text.x = element_text(angle = 90, hjust = 1,size=5),axis.text.y = element_text(size=5))

pdf('Heatmap.pdf')
print(p)
dev.off()

以上代码将产生以下热图：

TAXAplot.R （群落结构堆积图）

# ============================================================
# Tutorial on drawing a taxa plot using ggplot2
# by Umer Zeeshan Ijaz (http://userweb.eng./umer.ijaz)
# =============================================================
 
abund_table<-read.csv('SPE_pitlatrine.csv',row.names=1,check.names=FALSE)
 
#Transpose the data to have sample names on rows
abund_table<-t(abund_table)
meta_table<-read.csv('ENV_pitlatrine.csv',row.names=1,check.names=FALSE)
 
#Just a check to ensure that the samples in meta_table are in the same order as in abund_table
meta_table<-meta_table[rownames(abund_table),]
 
#Get grouping information
grouping_info<-data.frame(row.names=rownames(abund_table),t(as.data.frame(strsplit(rownames(abund_table),'_'))))
# > head(grouping_info)
# X1 X2 X3
# T_2_1   T  2  1
# T_2_10  T  2 10
# T_2_12  T  2 12
# T_2_2   T  2  2
# T_2_3   T  2  3
# T_2_6   T  2  6
 
#Apply proportion normalisation
x<-abund_table/rowSums(abund_table)
x<-x[,order(colSums(x),decreasing=TRUE)]
 
#Extract list of top N Taxa
N<-21
taxa_list<-colnames(x)[1:N]
#remove '__Unknown__' and add it to others
taxa_list<-taxa_list[!grepl('Unknown',taxa_list)]
N<-length(taxa_list)
 
#Generate a new table with everything added to Others
new_x<-data.frame(x[,colnames(x) %in% taxa_list],Others=rowSums(x[,!colnames(x) %in% taxa_list]))
 
 
#You can change the Type=grouping_info[,1] should you desire any other grouping of panels
df<-NULL
for (i in 1:dim(new_x)[2]){
  tmp<-data.frame(row.names=NULL,Sample=rownames(new_x),Taxa=rep(colnames(new_x)[i],dim(new_x)[1]),Value=new_x[,i],Type=grouping_info[,1])
  if(i==1){df<-tmp} else {df<-rbind(df,tmp)}
}
colours <- c('#F0A3FF', '#0075DC', '#993F00','#4C005C','#2BCE48','#FFCC99','#808080','#94FFB5','#8F7C00','#9DCC00','#C20088','#003380','#FFA405','#FFA8BB','#426600','#FF0010','#5EF1F2','#00998F','#740AFF','#990000','#FFFF00');
 
 
library(ggplot2)
p<-ggplot(df,aes(Sample,Value,fill=Taxa))+geom_bar(stat='identity')+facet_grid(. ~ Type, drop=TRUE,scale='free',space='free_x')
p<-p+scale_fill_manual(values=colours[1:(N+1)])
p<-p+theme_bw()+ylab('Proportions')
p<-p+ scale_y_continuous(expand = c(0,0))+theme(strip.background = element_rect(fill='gray85'))+theme(panel.margin = unit(0.3, 'lines'))
p<-p+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
pdf('TAXAplot.pdf',height=6,width=21)
print(p)
dev.off()

以上代码将产生以下堆积图：

Correlation.R （相关性热图）

# ============================================================
# Tutorial on drawing a correlation map using ggplot2
# by Umer Zeeshan Ijaz (http://userweb.eng./umer.ijaz)
# =============================================================

abund_table<-read.csv('SPE_pitlatrine.csv',row.names=1,check.names=FALSE)
#Transpose the data to have sample names on rows
abund_table<-t(abund_table)
meta_table<-read.csv('ENV_pitlatrine.csv',row.names=1,check.names=FALSE)

#Filter out samples with fewer counts
abund_table<-abund_table[rowSums(abund_table)>200,]

#Extract the corresponding meta_table for the samples in abund_table
meta_table<-meta_table[rownames(abund_table),]

#You can use sel_env to specify the variables you want to use and sel_env_label to specify the labes for the pannel
sel_env<-c('pH','Temp','TS','VS','VFA','CODt','CODs','perCODsbyt','NH4','Prot','Carbo')
sel_env_label <- list(
'pH'='PH',
'Temp'='Temperature',
'TS'='TS',
'VS'='VS',
'VFA'='VFA',
'CODt'='CODt',
'CODs'='CODs',
'perCODsbyt'='%CODs/t',
'NH4'='NH4',
'Prot'='Protein',
'Carbo'='Carbon'
)

sel_env_label<-t(as.data.frame(sel_env_label))
sel_env_label<-as.data.frame(sel_env_label)
colnames(sel_env_label)<-c('Trans')
sel_env_label$Trans<-as.character(sel_env_label$Trans)

#Now get a filtered table based on sel_env
meta_table_filtered<-meta_table[,sel_env]
abund_table_filtered<-abund_table[rownames(meta_table_filtered),]

#Apply normalisation (either use relative or log-relative transformation)
#x<-abund_table_filtered/rowSums(abund_table_filtered)
x<-log((abund_table_filtered+1)/(rowSums(abund_table_filtered)+dim(abund_table_filtered)[2]))

x<-x[,order(colSums(x),decreasing=TRUE)]
#Extract list of top N Taxa
N<-51
taxa_list<-colnames(x)[1:N]
#remove '__Unknown__' and add it to others
taxa_list<-taxa_list[!grepl('Unknown',taxa_list)]
N<-length(taxa_list)
x<-data.frame(x[,colnames(x) %in% taxa_list])
y<-meta_table_filtered

#Get grouping information
grouping_info<-data.frame(row.names=rownames(abund_table),t(as.data.frame(strsplit(rownames(abund_table),'_'))))
# > head(grouping_info)
# X1 X2 X3
# T_2_1 T 2 1
# T_2_10 T 2 10
# T_2_12 T 2 12
# T_2_2 T 2 2
# T_2_3 T 2 3
# T_2_6 T 2 6

#Let us group on countries
groups<-grouping_info[,1]

#You can use kendall, spearman, or pearson below:
method<-'kendall'

#Now calculate the correlation between individual Taxa and the environmental data
df<-NULL
for(i in colnames(x)){
for(j in colnames(y)){
for(k in unique(groups)){
a<-x[groups==k,i,drop=F]
b<-y[groups==k,j,drop=F]
tmp<-c(i,j,cor(a[complete.cases(b),],b[complete.cases(b),],use='everything',method=method),cor.test(a[complete.cases(b),],b[complete.cases(b),],method=method)$p.value,k)
if(is.null(df)){
df<-tmp
}
else{
df<-rbind(df,tmp)
}
}
}
}

df<-data.frame(row.names=NULL,df)
colnames(df)<-c('Taxa','Env','Correlation','Pvalue','Type')
df$Pvalue<-as.numeric(as.character(df$Pvalue))
df$AdjPvalue<-rep(0,dim(df)[1])
df$Correlation<-as.numeric(as.character(df$Correlation))

#You can adjust the p-values for multiple comparison using Benjamini & Hochberg (1995):
# 1 -> donot adjust
# 2 -> adjust Env + Type (column on the correlation plot)
# 3 -> adjust Taxa + Type (row on the correlation plot for each type)
# 4 -> adjust Taxa (row on the correlation plot)
# 5 -> adjust Env (panel on the correlation plot)
adjustment_label<-c('NoAdj','AdjEnvAndType','AdjTaxaAndType','AdjTaxa','AdjEnv')
adjustment<-5

if(adjustment==1){
df$AdjPvalue<-df$Pvalue
} else if (adjustment==2){
for(i in unique(df$Env)){
for(j in unique(df$Type)){
sel<-df$Env==i & df$Type==j
df$AdjPvalue[sel]<-p.adjust(df$Pvalue[sel],method='BH')
}
}
} else if (adjustment==3){
for(i in unique(df$Taxa)){
for(j in unique(df$Type)){
sel<-df$Taxa==i & df$Type==j
df$AdjPvalue[sel]<-p.adjust(df$Pvalue[sel],method='BH')
}
}
} else if (adjustment==4){
for(i in unique(df$Taxa)){
sel<-df$Taxa==i
df$AdjPvalue[sel]<-p.adjust(df$Pvalue[sel],method='BH')
}
} else if (adjustment==5){
for(i in unique(df$Env)){
sel<-df$Env==i
df$AdjPvalue[sel]<-p.adjust(df$Pvalue[sel],method='BH')
}
}

#Now we generate the labels for signifant values
df$Significance<-cut(df$AdjPvalue, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c('***', '**', '*', ''))

#We ignore NAs
df<-df[complete.cases(df),]

#We want to reorganize the Env data based on they appear
df$Env<-factor(df$Env,as.character(df$Env))

#We use the function to change the labels for facet_grid in ggplot2
Env_labeller <- function(variable,value){
return(sel_env_label[as.character(value),'Trans'])
}

p <- ggplot(aes(x=Type, y=Taxa, fill=Correlation), data=df)
p <- p + geom_tile() + scale_fill_gradient2(low='#2C7BB6', mid='white', high='#D7191C')
p<-p+theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5))
p<-p+geom_text(aes(label=Significance), color='black', size=3)+labs(y=NULL, x=NULL, fill=method)
p<-p+facet_grid(. ~ Env, drop=TRUE,scale='free',space='free_x',labeller=Env_labeller)
pdf(paste('Correlation_',adjustment_label[adjustment],'.pdf',sep=''),height=8,width=22)
print(p)
dev.off()

以上代码将产生以下相关性指数所做的热图：