cellphonedb目前针对人类基因,因此,对于小鼠基因涉及到人鼠基因的转换,本教程正是解决这一问题。 1 少量人鼠基因转换的函数1-1 小鼠基因转人类基因library("biomaRt") human = useEnsembl(biomart="ensembl", dataset = "hsapiens_gene_ensembl") mouse = useEnsembl(biomart="ensembl", dataset = "mmusculus_gene_ensembl")
# Basic function to convert mouse to human gene names convertMouseGeneList <- function(x){ genesV2 = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = x , mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows=T) humanx <- unique(genesV2[, 2])
# Print the first 6 genes found to the screen return(humanx) }
musGenes <- c("Hmmr", "Tlx3", "Cpeb4") convertMouseGeneList(musGenes)
## [1] "HMMR" "CPEB4" "TLX3"
1-2 人类基因转小鼠基因# Basic function to convert human to mouse gene names convertHumanGeneList <- function(x){
genesV2 = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = x , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=T) humanx <- unique(genesV2[, 2])
# Print the first 6 genes found to the screen return(humanx) }
humGenes <- c("HMMR","TLX3","CPEB4") convertHumanGeneList(humGenes)
## [1] "Cpeb4" "Hmmr" "Tlx3"
2 从源头转小鼠基因到人类基因从10X Genomics官方网站[1]下载小鼠参考基因组文件,
运行cellranger后,得到feature.tsv文件,然后,将小鼠基因从源头转为人类基因。 DownloadMouse reference dataset required for Cell Ranger wget https://cf./supp/cell-exp/refdata-gex-mm10-2020-A.tar.gz
# Offer downloadable file function library(tidyverse) library(DT) create_dt <- function(x){ DT::datatable(x, extensions = 'Buttons', options = list(dom = 'Blfrtip', buttons = c('csv', 'excel', 'pdf'), lengthMenu = list(c(10,25,50,-1), c(10,25,50,"All")))) }
# 读取features.tsv文件 gene_list <- read.table('features.tsv', header=F, sep='\t') create_dt(gene_list)
# 转小鼠基因名为人类基因名 genesV2 = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = gene_list$V2, mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows=T) genesV2 <- genesV2[!duplicated(genesV2[,2]),] create_dt(genesV2)
# 保存文件 # write.table(genesV2, "mouse_to_human_genes.txt", sep="\t", row.names=F, quote=F)
3 应用转换后的小鼠到人类基因表# 读取文件 genesV2 <- read.table("mouse_to_human_genes.txt", sep="\t", header=T)
# 准备小鼠数据集 library(Seurat) library(SeuratData) # To see a manifest of all available datasets AvailableData()
# Choose small mouse dataset InstallData('stxKidney') data('stxKidney') stxKidney
## An object of class Seurat ## 31053 features across 1438 samples within 1 assay ## Active assay: Spatial (31053 features, 0 variable features)
## To accelerate, sample 100 cells seurat_object <- stxKidney sub_cells <- subset(seurat_object, cells = sample(Cells(seurat_object), 100)) sub_cells@assays$Spatial[1:4,1:4]
## 4 x 4 sparse Matrix of class "dgCMatrix" ## GTGGACGCATTTGTCC-1 CTTAGTGTAGTAGCAT-1 ACGCAAACTAATAGAT-1 ## Xkr4 . . . ## Gm1992 . . . ## Gm37381 . . . ## Rp1 . . . ## GGGCTATGATCGATGG-1 ## Xkr4 . ## Gm1992 . ## Gm37381 . ## Rp1 .
## Extract Expression Data sp1 <- sub_cells sp1_counts <- as.matrix(sp1@assays$Spatial@data) # Notice: 这里应用的是空间数据,常规转录组数据提取用sp1@assays$RNA@data sp1_counts <- data.frame(gene=rownames(sp1_counts), sp1_counts, check.names = F) dim(sp1_counts)
## [1] 31053 101
sp1_counts[1:4, 1:4]
## gene GTGGACGCATTTGTCC-1 CTTAGTGTAGTAGCAT-1 ACGCAAACTAATAGAT-1 ## Xkr4 Xkr4 0 0 0 ## Gm1992 Gm1992 0 0 0 ## Gm37381 Gm37381 0 0 0 ## Rp1 Rp1 0 0 0
# 转小鼠基因名为人类基因名 sp1_counts$Gene <- genesV2[match(sp1_counts$gene, genesV2[,1]),2] sp1_counts <- subset(sp1_counts, Gene!='NA') sp1_counts <- dplyr::select(sp1_counts, Gene, everything()) sp1_counts <- sp1_counts[, !(colnames(sp1_counts) %in% 'gene')] dim(sp1_counts)
## [1] 16404 101
sp1_counts[1:4,1:4]
## Gene GTGGACGCATTTGTCC-1 CTTAGTGTAGTAGCAT-1 ACGCAAACTAATAGAT-1 ## Xkr4 XKR4 0 0 0 ## Rp1 RP1 0 0 0 ## Sox17 SOX17 0 0 1 ## Mrpl15 MRPL15 3 2 3
# # 保存文件 # write.table(sp1_counts, "sp1_counts_human.txt", row.names=F, sep='\t', quote=F)
文中链接[1]10X Genomics官方网站: https://support./single-cell-gene-expression/software/downloads/latest?
|