【原】使用R实现批量方差分析(aov)和多重比较(LSD)

微生信生物 2021-01-16

展开全文

Tao Wen

2019年1月5日

新

年

快

乐

引子

由我的工作来看，coding存在的意义就是无序重复机械的分析过程，让自己的时间集中在科学问题上，然而实现这个过程却花费了更多的？时间。我们可以轻松的使用各种软件实现方差检验和多重比较。但是在同时对几百甚至几千组数据做方差分析和多重比较的时候，coding的优势就提体现出来了。

全部代码

##清空内存rm(list=ls()) 
#载入包
library("vegan")
library(dplyr)
library(tidyverse)
library(agricolae)
library(car)

#修改工作目录
setwd("F:/")
##############全部峰值数据来做统计#########去除磷酸之后进行分析
RET_all = read.delim("./Data_processing/a0_RET_缺失值处理clean_root_exudates.txt", header=T, row.names= 1,check.names=F) 
head(RET_all)

##            CK2       CK5       CK3       CK1      CK4       RS1       RS2
## RE1  26632.985 25224.034  30596.48  29812.76 25972.92 225065.33  75659.57
## RE2 118030.479 60185.801 212033.07 105679.97 94223.07      0.00      0.00
## RE3  92672.041 65451.785  96917.73 100051.53 77912.07 689247.38 232806.81
## RE4  22620.908  4666.435  18808.13   9232.97 22456.49  11528.64  11517.85
## RE5  22688.856 14983.546  17679.77  16747.14 16299.52  18650.75  19104.47
## RE6   9271.438  8784.055  12884.40  10313.30 10313.30      0.00      0.00
##            RS3        RS5      RS4        B1        B2        B3       B4
## RE1  31002.659  31484.553 26169.62  30109.59 31586.178  28028.22 30081.14
## RE2      0.000      0.000     0.00      0.00     0.000      0.00     0.00
## RE3 107402.115 101363.214 83245.85 151057.74 99776.930 101816.86 90471.41
## RE4   4962.333   3686.371 14990.28   5080.92  8451.872  27181.51 23715.11
## RE5  21119.811  14020.793 20534.05  18634.03 20593.487  23398.10 13077.19
## RE6      0.000      0.000     0.00      0.00     0.000      0.00     0.00
##            B5                                      compound
## RE1 28979.445                            (-)-Dihydrocarveol
## RE2     0.000 (2R,3S)-2-hydroxy-3-isopropylbutanedioic acid
## RE3 74380.312                        1,4-Cyclohexanedione 2
## RE4  6579.112             11-beta-prostaglandin-F-2-alpha 1
## RE5 17367.560                                 1-Hexadecanol
## RE6     0.000                     2,3-Dimethylsuccinic acid

RET = RET_all[1:dim(RET_all)[2]-1]
head(RET)

#导入需要的作图文件design = read.table("./Data_processing/mapping_RET_all.txt", header=T, row.names= 1, sep="\t") 
head(design)

##     group
## CK5    CK
## CK4    CK
## CK3    CK
## CK2    CK
## CK1    CK
## B5      B

design$ID = row.names(design)# 过滤数据并排序idx = rownames(design) %in% colnames(RET) 
sub_design = design[idx,]
count = RET[ ,rownames(sub_design)]
head(count)

comp_all = dim(count)
head(count)

# 转换原始数据为百分比，norm = t(t(count)/colSums(count,na=T)) * 100 # normalization to total 100head(norm)

##             CK5         CK4         CK3         CK2         CK1
## RE1 0.005549065 0.013885418 0.011357306 0.009472478 0.011338921
## RE2 0.013240346 0.050372725 0.078705924 0.041979565 0.040194100
## RE3 0.014398817 0.041652675 0.035975518 0.032960401 0.038053388
## RE4 0.001026575 0.012005496 0.006981510 0.008045514 0.003511648
## RE5 0.003296248 0.008713910 0.006562666 0.008069681 0.006369571
## RE6 0.001932415 0.005513607 0.004782645 0.003297546 0.003922539
##              B5          B4          B3          B2          B1
## RE1 0.006645724 0.011490223 0.008876583 0.007449352 0.011995785
## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## RE3 0.017057299 0.034557751 0.032245560 0.023531604 0.060182030
## RE4 0.001508758 0.009058563 0.008608428 0.001993307 0.002024260
## RE5 0.003982824 0.004995150 0.007410215 0.004856812 0.007423875
## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##             RS5         RS4         RS3         RS2         RS1
## RE1 0.009075086 0.005696559 0.012207578 0.022859290 0.066628198
## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## RE3 0.029216864 0.018120817 0.042290558 0.070338732 0.204044354
## RE4 0.001062557 0.003263060 0.001953964 0.003479927 0.003412932
## RE5 0.004041344 0.004469817 0.008316117 0.005772101 0.005521356
## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000

norm=as.data.frame(norm)

a=norm
head(a)

Pvalue<-c(rep(0,nrow(a)))
pB<-c(rep(0,nrow(a)))
pCK<-c(rep(0,nrow(a)))
pRS<-c(rep(0,nrow(a)))
str(pCK)

## num [1:604] 0 0 0 0 0 0 0 0 0 0 ...

# 2~4列是处理组1,5~7列是处理组2；#将使用循环对每一行进行方差检验#两组表达量都是0的基因，不检验；#每一行计算得到p value和log2FC将被加入原文件的后两列；#计算log2FC时，每组均值加0.001，是为了防止分母为0导致bug；# i= 2# ab = as.numeric(a[i,1:15])# b = sub_design$group# aa = data.frame(ab,b)# y=aov(ab~b,data = aa)# Pvalue[i]<-summary(y)[[1]][,5][1]# out <- LSD.test(y,"b", p.adj="none")# bb = as.data.frame(out$group)# bb$ID = row.names(bb)# bb<- arrange(bb, row.names(bb))# pB = bb[1,2]# pCK = bb[2,2]# pRS = bb[3,2]for(i in 1:nrow(a)){  if(sum(a[i,1:5])==0&&sum(a[i,6:10])==0&&sum(a[i,11:15])==0){
    Pvalue[i] <- "NA"
  }else{
    ab = as.numeric(a[i,1:15])
    b = sub_design$group
    aa = data.frame(ab,b)
    y=aov(ab~b,data = aa)
    Pvalue[i]<-summary(y)[[1]][,5][1]
    out <- LSD.test(y,"b", p.adj="none")
    bb = as.data.frame(out$group)
    bb$ID = row.names(bb)
    bb<- arrange(bb, row.names(bb))
    pCK[i] = bb$groups[2]
    pB[i] = as.character(bb[1,2])
    pCK[i] = as.character(bb[2,2])
    pRS[i] = as.character(bb[3,2])
    pRS[i];pCK[i];pB[i]    # Dunnett<-glht(y, linfct = mcp(type = "Dunnett"),alternative = "two.side")
    # out <- LSD.test(y,sub_design$group, p.adj="none")#进行多重比较，不矫正P值
    # tuk=TukeyHSD(y)
    # out <-scheffe.test (y,"group")
    
  }
}# Pvalueout<-cbind(a,Pvalue, pB,pCK,pRS)
head(out)

##             CK5         CK4         CK3         CK2         CK1
## RE1 0.005549065 0.013885418 0.011357306 0.009472478 0.011338921
## RE2 0.013240346 0.050372725 0.078705924 0.041979565 0.040194100
## RE3 0.014398817 0.041652675 0.035975518 0.032960401 0.038053388
## RE4 0.001026575 0.012005496 0.006981510 0.008045514 0.003511648
## RE5 0.003296248 0.008713910 0.006562666 0.008069681 0.006369571
## RE6 0.001932415 0.005513607 0.004782645 0.003297546 0.003922539
##              B5          B4          B3          B2          B1
## RE1 0.006645724 0.011490223 0.008876583 0.007449352 0.011995785
## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## RE3 0.017057299 0.034557751 0.032245560 0.023531604 0.060182030
## RE4 0.001508758 0.009058563 0.008608428 0.001993307 0.002024260
## RE5 0.003982824 0.004995150 0.007410215 0.004856812 0.007423875
## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##             RS5         RS4         RS3         RS2         RS1
## RE1 0.009075086 0.005696559 0.012207578 0.022859290 0.066628198
## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## RE3 0.029216864 0.018120817 0.042290558 0.070338732 0.204044354
## RE4 0.001062557 0.003263060 0.001953964 0.003479927 0.003412932
## RE5 0.004041344 0.004469817 0.008316117 0.005772101 0.005521356
## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##           Pvalue pB pCK pRS
## RE1 2.796373e-01  a   a   a
## RE2 2.278410e-04  b   a   b
## RE3 3.121200e-01  a   a   a
## RE4 2.608948e-01  a   a   a
## RE5 6.511946e-01  a   a   a
## RE6 5.103709e-06  b   a   b

RET_name = RET_all[dim(RET_all)[2]]
head(RET_name)

##                                          compound
## RE1                            (-)-Dihydrocarveol
## RE2 (2R,3S)-2-hydroxy-3-isopropylbutanedioic acid
## RE3                        1,4-Cyclohexanedione 2
## RE4             11-beta-prostaglandin-F-2-alpha 1
## RE5                                 1-Hexadecanol
## RE6                     2,3-Dimethylsuccinic acid

rusult_out = merge(out ,RET_name, by="row.names",all=F)
head(rusult_out)

##   Row.names         CK5         CK4         CK3         CK2         CK1
## 1       RE1 0.005549065 0.013885418 0.011357306 0.009472478 0.011338921
## 2      RE10 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## 3     RE100 0.001650192 0.003067643 0.002522073 0.001038059 0.002182409
## 4     RE101 0.006185346 0.017028862 0.029767109 0.012691608 0.072403533
## 5     RE102 0.002229133 0.008797501 0.021920508 0.004811970 0.026383348
## 6     RE103 0.001405335 0.002621616 0.005901619 0.002184498 0.011798804
##            B5          B4          B3          B2           B1         RS5
## 1 0.006645724 0.011490223 0.008876583 0.007449352 0.0119957851 0.009075086
## 2 0.001245465 0.001468414 0.001217488 0.001201888 0.0004006671 0.001499867
## 3 0.000000000 0.000000000 0.000000000 0.000000000 0.0000000000 0.002260963
## 4 0.026783700 0.070971462 0.011183080 0.019668387 0.0648061792 0.015967222
## 5 0.021370834 0.030246691 0.009708425 0.010392458 0.0244686738 0.008926288
## 6 0.009331215 0.009423304 0.002036022 0.002954869 0.0090469287 0.004238941
##           RS4         RS3         RS2          RS1       Pvalue pB pCK pRS
## 1 0.005696559 0.012207578 0.022859290 0.0666281983 2.796373e-01  a   a   a
## 2 0.001142296 0.001528522 0.001253334 0.0006690667 6.950709e-05  a   b   a
## 3 0.001242421 0.003830482 0.002939149 0.0046277758 5.597853e-04  b   a   a
## 4 0.007431094 0.023015901 0.019496753 0.0210254041 3.504130e-01  a   a   a
## 5 0.005726721 0.007780721 0.012159299 0.0048272448 1.341319e-01  a  ab   b
## 6 0.001370010 0.003022282 0.002371406 0.0034698880 2.604930e-01  a   a   a
##                                compound
## 1                    (-)-Dihydrocarveol
## 2 2-Amino-2-norbornanecarboxylic acid 3
## 3                           Analyte 172
## 4                           Analyte 177
## 5                           Analyte 178
## 6                           Analyte 179

dim(rusult_out)## [1] 604 21

write.table(rusult_out,"./sub_result_and_script_allgroup//a3_RET_compounds_t检验all结果.txt",quote = FALSE,row.names = T,
            col.names = NA,sep = "\t")