载入数据library(dplyr) ## ## Attaching package: 'dplyr' ## The following objects are masked from 'package:stats': ## ## filter, lag ## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union ## if(T){ load('expma.Rdata') load('probe.Rdata') } expma[1:5,1:5] ## GSM188013 GSM188014 GSM188016 GSM188018 GSM188020 ## 1007_s_at 15630.200 17048.800 13667.500 15138.800 10766.600 ## 1053_at 3614.400 3563.220 2604.650 1945.710 3371.290 ## 117_at 1032.670 1164.150 510.692 5061.200 452.166 ## 121_at 5917.800 6826.670 4562.440 5870.130 3869.480 ## 1255_g_at 224.525 395.025 207.087 164.835 111.609 boxplot(expma)##看下表达情况
metdata[1:5,1:5]
head(probe) ## ID Gene Symbol ENTREZ_GENE_ID ## 2 1053_at RFC2 5982 ## 3 117_at HSPA6 3310 ## 4 121_at PAX8 7849 ## 5 1255_g_at GUCA1A 2978 ## 7 1316_at THRA 7067 ## 8 1320_at PTPN21 11099 查看Gene Symbol是否有重复table(duplicated(probe$`Gene Symbol`))##12549 FALSE 整合注释信息到表达矩阵ID<> expma<> expma<> eset<-expma[id %in%="" probe$id,]="" %="">% cbind(probe)-expma[id> eset[1:5,1:5] ## GSM188013 GSM188014 GSM188016 GSM188018 GSM188020 ## 1053_at 11.819940 11.799371 11.347428 10.926822 11.719513 ## 117_at 10.013560 10.186300 8.999132 12.305549 8.823896 ## 121_at 12.531089 12.737178 12.155906 12.519422 11.918297 ## 1255_g_at 7.817144 8.629448 7.701043 7.373605 6.815178 ## 1316_at 9.497459 9.868281 8.831323 9.211346 8.453592 colnames(eset) ## [1] 'GSM188013' 'GSM188014' 'GSM188016' 'GSM188018' ## [5] 'GSM188020' 'GSM188022' 'ID' 'Gene Symbol' ## [9] 'ENTREZ_GENE_ID' ##方法一:aggregate函数 test1<-aggregate(x=eset[,1:6],by=list(eset$`gene symbol`),fun="">-aggregate(x=eset[,1:6],by=list(eset$`gene> ##方法二:dplyr
eset[1:5,1:5] ## GSM188013 GSM188014 GSM188016 GSM188018 GSM188020 ## 1053_at 11.819940 11.799371 11.347428 10.926822 11.719513 ## 117_at 10.013560 10.186300 8.999132 12.305549 8.823896 ## 121_at 12.531089 12.737178 12.155906 12.519422 11.918297 ## 1255_g_at 7.817144 8.629448 7.701043 7.373605 6.815178 ## 1316_at 9.497459 9.868281 8.831323 9.211346 8.453592 dim(eset) ## [1] 20878 9 colnames(eset)[8]<> colnames(eset) ## [1] 'GSM188013' 'GSM188014' 'GSM188016' 'GSM188018' ## [5] 'GSM188020' 'GSM188022' 'ID' 'Gene' ## [9] 'ENTREZ_GENE_ID' test2<-eset[,c(1:6,8)] %="">%-eset[,c(1:6,8)]> arrange(Gene) %>% group_by(Gene) %>% summarise_all(mean) dim(test2)##同样得出了12457,与方法1结果相同 ## [1] 12549 7 ##方法三:tapply函数
#Sys.setenv(LANGUAGE= 'en') ##方法四:by函数
## for循环完成迭代 output<> for (i in 1:6){ value=by(eset[,i], INDICES = list(eset$Gene),FUN = mean) output<> } output[1:5,1:5] ## A1CF A2M A4GALT A4GNT ## value 7.165374 9.713797 5.580703 8.583414 9.163199 ## value 7.244615 9.743305 5.550033 5.929258 9.431585 ## value 7.813573 10.413311 5.652593 7.048563 8.779887 ## value 7.322442 9.940773 8.142194 8.679885 9.391991 ## value 8.368513 10.616926 6.422112 7.818966 10.182382 output<> colnames(output)<> test4<> dim(test4) ## [1] 12549 6 test4[1:5,1:5]##得到的结果与test4相同 ## GSM188013 GSM188014 GSM188016 GSM188018 GSM188020 ## 7.165374 7.244615 7.813573 7.322442 8.368513 ## A1CF 9.713797 9.743305 10.413311 9.940773 10.616926 ## A2M 5.580703 5.550033 5.652593 8.142194 6.422112 ## A4GALT 8.583414 5.929258 7.048563 8.679885 7.818966 ## A4GNT 9.163199 9.431585 8.779887 9.391991 10.18238 本期内容就到这里,我是老朋友白介素2,下期再见。
|
|