欢迎来到医科研,这里是白介素2的读书笔记,跟我一起聊临床与科研的故事, 生物医学数据挖掘,R语言,TCGA、GEO数据挖掘。 
R语言生存分析 
生存分析是医学数据挖掘中的重要内容 R语言中用于生存分析的包主要有survival与survminer
library(survival) library(survminer) library(RTCGA.clinical)
提取生存信息 应用RTCGA.clinical包 Sys.setlocale('LC_ALL','C') ## [1] "C" survivalTCGA(BRCA.clinical, OV.clinical, extract.cols = "admin.disease_code") -> BRCAOV.survInfo head(BRCAOV.survInfo) ## times bcr_patient_barcode patient.vital_status admin.disease_code ## 1 3767 TCGA-3C-AAAU 0 brca ## 2 3801 TCGA-3C-AALI 0 brca ## 3 1228 TCGA-3C-AALJ 0 brca ## 4 1217 TCGA-3C-AALK 0 brca ## 5 158 TCGA-4H-AAAK 0 brca ## 6 1477 TCGA-5L-AAT0 0 brca
Surv(OS_time,status)~factor(分组因素) 构建生存对象需要生存时间,生存状态,datafit <- survfit(Surv(times, patient.vital_status) ~ admin.disease_code, data = BRCAOV.survInfo) fit ## Call: survfit(formula = Surv(times, patient.vital_status) ~ admin.disease_code, ## data = BRCAOV.survInfo) ## ## n events median 0.95LCL 0.95UCL ## admin.disease_code=brca 1098 104 3472 3126 4456 ## admin.disease_code=ov 576 297 1354 1229 1470
survminer包可视化:生存对象,data,risk.table是风险表 survminer::ggsurvplot(fit, data = BRCAOV.survInfo, risk.table = TRUE)
Fig1对生存曲线进一步细节控制美化 调整参数ggsurvplot( fit, # 生存对象 data = BRCAOV.survInfo, # data. risk.table = TRUE, # 风险表. pval = TRUE, # p-value of log-rank test. conf.int = TRUE, # 95%CI # 生存曲线的点估计. xlim = c(0,2000), # present narrower X axis, but not affect # survival estimates. break.time.by = 500, # break X axis in time intervals by 500. ggtheme = theme_minimal(), # 主题定制. risk.table.y.text.col = T, # colour risk table text annotations. risk.table.y.text = FALSE # show bars instead of names in text annotations # in legend of risk table )
image.png基础版本的生存曲线 library("survival") head(lung) ## inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss ## 1 3 306 2 74 1 1 90 100 1175 NA ## 2 3 455 2 68 1 0 90 90 1225 15 ## 3 3 1010 1 56 1 0 90 90 NA 15 ## 4 5 210 2 57 1 1 90 60 1150 11 ## 5 1 883 2 60 1 0 100 90 NA 0 ## 6 12 1022 1 74 1 1 50 80 513 0 fit<- survfit(Surv(time, status) ~ sex, data = lung)
绘图ggsurvplot(fit, data = lung)
image.png定制版的生存曲线 ggsurvplot(fit, data = lung, title = "Survival curves", subtitle = "Based on Kaplan-Meier estimates",#标题 caption = "created with survminer",#说明-右下角 font.title = c(16, "bold", "darkblue"),#标题字体 font.subtitle = c(15, "bold.italic", "purple"),#副标题字体 font.caption = c(14, "plain", "orange"),#说明字体 font.x = c(14, "bold.italic", "red"),#x轴字体 font.y = c(14, "bold.italic", "darkred"),#y轴字体 font.tickslab = c(12, "plain", "darkgreen"))#
image.png风险表risk.table ggsurvplot(fit, data = lung, risk.table = TRUE)
image.pngncens plot-展示删失数据的情况 这个功能在文章里面见得比较少,但是也能画 ggsurvplot(fit, data = lung, risk.table = TRUE, ncensor.plot = TRUE)
image.png
|