安装GISTIC2.0
mamba create -n gistic
mamba activate gistic
mamba install -c hcc gistic2=2.0.23
sudo apt install libncurses5
下载参考基因组
The reference genome file contains information about the location of genes and cytobands on a given build of the genome.
有 hg19 和 hg38 两个版本,可以在这个网站下载到
https://github.com/bzhanglab/GISTIC2_example
准备输入文件
只需要准备Segmentation File
CNV数据下载可以参考之前的推文 数据挖掘:是时候更新一下TCGA的数据了;
从之前下载好的数据TCGA_LUAD_CNV_Masked.arrow
读入并整理;
第三行代码选择 01A 肿瘤样本;
Seg文件共6列,详细参考官方文档https://broadinstitute./gistic2/
using(tidyverse, magrittr, TCGAbiolinks, data.table, maftools)
arrow::read_ipc_file("TCGA_LUAD_CNV_Masked.arrow") %>%
dplyr::filter(stringr::str_detect(Sample,".{13}01A")) %>%
dplyr::select(Sample,everything(),-GDC_Aliquot) %>%
data.table::fwrite("LUAD_masked.seg",sep="\t")
gistic脚本文件,需要输入 3 个位置参数,1 输出目录,2 参考基因组,3seg 文件
#!/bin/bash
source activate gistic
gistic2 \
-b $1 \
-refgene $2 \
-seg $3 \
-genegistic 1 \
-smallmem 0 \
-rx 0 \
-broad 1 \
-brlen 0.7 \
-conf 0.99 \
-armpeel 1 \
-savegene 1 \
-gcm extreme \
-v 30 \
-maxseg 46000 \
-ta 0.3 \
-td 0.3 \
-cap 1.5 \
-js 4
gistic脚本文件的使用
chmod u+x gistic.sh
nohup ./gistic.sh ./ hg38.UCSC.add_miR.160920.refgene.mat LUAD_masked.seg &> LUAD.log &
输出文件
├── all_data_by_genes.txt
├── all_lesions.conf_99.txt
├── all_thresholded.by_genes.txt
├── amp_genes.conf_99.txt
├── amp_qplot.pdf
├── amp_qplot.png
├── broad_data_by_genes.txt
├── broad_significance_results.txt
├── broad_values_by_arm.txt
├── D.cap1.5.mat
├── del_genes.conf_99.txt
├── del_qplot.pdf
├── del_qplot.png
├── focal_dat.0.7.mat
├── focal_data_by_genes.txt
├── freqarms_vs_ngenes.pdf
├── gistic_inputs.mat
├── peak_regs.mat
├── perm_ads.mat
├── raw_copy_number.pdf
├── raw_copy_number.png
├── regions_track.conf_99.bed
├── sample_cutoffs.txt
├── sample_seg_counts.txt
├── scores.0.7.mat
├── scores.gistic
└── wide_peak_regs.mat
maftools读入gistic输出的 4 个文件
g <- maftools::readGistic(
gisticAllLesionsFile = "all_lesions.conf_99.txt",
gisticAmpGenesFile = "amp_genes.conf_99.txt",
gisticDelGenesFile = "del_genes.conf_99.txt",
gisticScoresFile = "scores.gistic",
isTCGA = TRUE,
verbose = FALSE
)
染色体图 genome plot
pdf("ChromPlot.pdf", width = 12, height = 6, onefile = TRUE)
maftools::gisticChromPlot(
gistic = g,
fdrCutOff = 0.1,
txtSize = 0.8,
cytobandTxtSize = 0.5,
color = c("#D95F02", "#1B9E77"),
markBands = "all",
ref.build = "hg38",
y_lims = c(-0.5, 0.5)
)
dev.off()
BubblePlot
pdf("BubblePlot.pdf", width = 9, height = 6, onefile = TRUE)
maftools::gisticBubblePlot(
gistic = g,
color = c("#D95F02", "#1B9E77"),
markBands = NULL,
fdrCutOff = 0.1,
log_y = TRUE,
txtSize = 3
)
dev.off()
gisticOncoPlot
clinicalData分组信息
Tumor_Sample_Barcodes Group
1 TCGA-05-4249 Low
2 TCGA-05-4250 High
3 TCGA-05-4382 Low
clinicalData <- data.table::fread("clinicalData.txt") %>%
dplyr::rename(Tumor_Sample_Barcodes = V1) %>%
dplyr::mutate(Tumor_Sample_Barcodes = str_sub(Tumor_Sample_Barcodes, 1, 12))
pdf("gisticOncoPlot.pdf", width = 9, height = 6, onefile = TRUE)
maftools::gisticOncoPlot(
gistic = g,
clinicalData = clinicalData,
clinicalFeatures = "Group",
sortByAnnotation = TRUE,
top = 10,
gene_mar = 10,
barcode_mar = 10,
sepwd_genes = 0.5,
sepwd_samples = 0.25,
bandsToIgnore = NULL,
removeNonAltered = TRUE,
colors = c(Amp = "#D95F02", Del = "#1B9E77"),
SampleNamefontSize = 0.6,
fontSize = 0.8,
legendFontSize = 1.2,
annotationFontSize = 1.2,
borderCol = "white",
bgCol = "#CCCCCC"
)
dev.off()
Reference
https://www./packages/release/bioc/vignettes/maftools/inst/doc/maftools.html
http:///packages/devel/bioc/vignettes/maftools/inst/doc/oncoplots.html