Closed ixxmu closed 3 years ago
首先看看需要什么样的命令才能有loom文件:
Usage: velocyto run10x [OPTIONS] SAMPLEFOLDER GTFFILE
Runs the velocity analysis for a Chromium 10X Sample
10XSAMPLEFOLDER specifies the cellranger sample folder
GTFFILE genome annotation file
Options:
-s, --metadatatable FILE Table containing metadata of the various samples (csv fortmated rows are samples and cols are entries)
-m, --mask FILE .gtf file containing intervals to mask
-l, --logic TEXT The logic to use for the filtering (default: Default)
-M, --multimap Consider not unique mappings (not reccomended)
-@, --samtools-threads INTEGER The number of threads to use to sort the bam by cellID file using samtools
--samtools-memory INTEGER The number of MB used for every thread by samtools to sort the bam file
-t, --dtype TEXT The dtype of the loom file layers - if more than 6000 molecules/reads per gene per cell are expected set uint32 to avoid truncation (default run_10x: uint16)
-d, --dump TEXT For debugging purposes only: it will dump a molecular mapping report to hdf5. --dump N, saves a cell every N cells. If p is prepended a more complete (but huge) pickle report is printed (default: 0)
-v, --verbose Set the vebosity level: -v (only warinings) -vv (warinings and info) -vvv (warinings, info and debug)
--help Show this message and exit.
CLI Usage Guide — velocyto 0.17.16 documentation
http://velocyto.org/velocyto.py/tutorial/cli.html
我一开始还以为自己成功跑上了,但是很快就出错了:
通过搜索,发现可能是自己下载文件出了问题:
Need preparation data to run velocyto: samtools version, bam file, repeat_msk.gtf · Issue #112 · velocyto-team/velocyto.py
https://github.com/velocyto-team/velocyto.py/issues/112
Table Browser
https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=611454127_NtvlaW6xBSIRYJEBI0iRDEWisITa&clade=mammal&org=Mouse&db=mm10&hgta_group=allTracks&hgta_track=xenoRefGene&hgta_table=0&hgta_regionType=genome&position=chr12%3A56%2C694%2C976-56%2C714%2C605&hgta_outputType=gff&hgta_outFileName=hg38_repeats_repeatMasker_allTracks.gtf
下载之后解压到服务器,重新跑,就成功了哟!
几个小时之后还是没有报错!
这是个好兆头~。
velocyto run10x -m /data/yudonglin/velocity/mm10_rmsk.gtf /data/yudonglin/singlecell/ABFC20190816-04-分析结果/NoPro /data/yudonglin/velocity/refdata-gex-mm10-2020-A/genes/genes.gtf
最终得到loom文件!
然后又遇到问题:
export OPENBLAS_NUM_THREADS=1
「生信Debug」OpenBLAS blas_thread_init: pthread_create: Resource temporarily unavailable_xuzhougeng blog-CSDN博客
https://blog.csdn.net/u012110870/article/details/106115577
最终成功!
> library(velocyto.R)
Loading required package: Matrix
> library(pagoda2)
Loading required package: igraph
Attaching package: ‘igraph’
The following objects are masked from ‘package:stats’:
decompose, spectrum
The following object is masked from ‘package:base’:
union
Attaching package: ‘pagoda2’
The following object is masked from ‘package:velocyto.R’:
armaCor
> ldat <- read.loom.matrices("/data/yudonglin/singlecell/ABFC20190816-04-分析结果/NoPro/velocyto/NoPro.loom")
reading loom file via hdf5r...
> emat <- ldat$spliced
> hist(log10(colSums(emat)),col='wheat',xlab='cell size')
>
> # 数据清洗
> emat <- emat[,colSums(emat)>=1e3]
> emat <- emat[!duplicated(rownames(emat)),]
>
> r <- Pagoda2$new(emat, modelType = "plain", trim = 10, log.scale = TRUE)
8158cells,32245genes; normalizing ...
using plain model
winsorizing ...
log scale ...
done.
> r$adjustVariance(plot = TRUE, do.par = TRUE, gam.k = 10)
calculating variance fit ...
using gam
2271overdispersed genes ...2271
persisting ...
done.
> r$calculatePcaReduction(nPcs = 100, n.odgenes = 3e3, maxit = 300)
running PCA using 3000 OD genes .
.
.
.
done
> r$makeKnnGraph(k = 30, type = "PCA", center = TRUE,distance = "cosine")
creating space of type angular done
adding data ... done
building index ... done
querying ... done
> r$getKnnClusters(method = multilevel.community, type = "PCA",name = "multilevel")
w.legend = FALSE, mark.clusters = TRUE,
min.group.size = 10, shuffle.colors = FALSE,
mark.cluster.cex = 1, alpha = 0.3, main = "cell clusters")
r$plotEmbedding(type = "PCA", embeddingType = "tSNE", colors = r$depth, main = "depth")> r$getEmbedding(type = "PCA", embeddingType = "tSNE",
+ perplexity = 50,verbose = FALSE)
calculating distance ...
pearson ...
running tSNE using56cores:
> r$plotEmbedding(type = "PCA", embeddingType = "tSNE",
+ show.legend = FALSE, mark.clusters = TRUE,
+ min.group.size = 10, shuffle.colors = FALSE,
+ mark.cluster.cex = 1, alpha = 0.3, main = "cell clusters")
Warning message:
Ignoring unknown parameters: mark.clusters, min.group.size, mark.cluster.cex, main
> r$plotEmbedding(type = "PCA", embeddingType = "tSNE", colors = r$depth, main = "depth")
> emat <- ldat$spliced
> nmat <- ldat$unspliced
> emat <- emat[,rownames(r$counts)]
nes.by.cluster.expression(emat, cluster.label,
min.max.cluster.average = 0.2)
nmat <- filter.genes.by.cluster.expression(nmat, cluster.label,
min.max.cluster.average = 0.05)
length(intersect(rownames(emat), rownames(nmat)))> nmat <- nmat[,rownames(r$counts)]
>
> cluster.label <- r$clusters$PCA$multilevel
> emb <- r$embeddings$PCA$tSNE
> cell.dist <- as.dist(1-armaCor(t(r$reductions$PCA)))
> emat <- filter.genes.by.cluster.expression(emat, cluster.label,
+ min.max.cluster.average = 0.2)
> nmat <- filter.genes.by.cluster.expression(nmat, cluster.label,
+ min.max.cluster.average = 0.05)
> length(intersect(rownames(emat), rownames(nmat)))
[1] 7526
> fit.quantile <- 0.02
> rvel.cd <- gene.relative.velocity.estimates(emat, nmat,deltaT = 1,
+ kCells = 25, cell.dist = cell.dist,
+ fit.quantile = fit.quantile)
calculating cell knn ... done
calculating convolved matrices ... done
fitting gamma coefficients ... done. succesfful fit for 7526 genes
filtered out 15 out of 7526 genes due to low nmat-emat correlation
filtered out 562 out of 7511 genes due to low nmat-emat slope
calculating RNA velocity shift ... done
calculating extrapolated cell state ... done
> show.velocity.on.embedding.cor(emb,rvel.cd,n=300,scale='sqrt',cell.colors=ac(cell.colors,alpha=0.5),cex=0.8,arrow.scale=5,show.grid.flow=TRUE,min.grid.cell.mass=0.5,grid.n=40,arrow.lwd=1,do.par=F,cell.border.alpha = 0.1)
Error in as.character(col) %in% "0" : object 'cell.colors' not found
> cell.colors <- sccore::fac2col(cluster.label)
> show.velocity.on.embedding.cor(emb, rvel.cd, n = 200, scale = 'sqrt',
+ cell.colors = ac(cell.colors, alpha = 0.5),
+ cex = 0.8, arrow.scale = 3, show.grid.flow = TRUE,
+ min.grid.cell.mass = 0.5, grid.n = 40,
+ arrow.lwd = 1,do.par = TRUE, cell.border.alpha = 0.1)
delta projections ... sqrt
knn ... transition probs ... done
calculating arrows ... done
grid estimates ... grid.sd= 1.327798 min.arrow.size= 0.02655596 max.grid.arrow.length= 0.0536128 done
>
> gene.relative.velocity.estimates(emat, nmat, deltaT = 1, kCells = 25,
+ kGenes = 1, fit.quantile = fit.quantile,
+ cell.emb = emb, cell.colors = cell.colors,
+ cell.dist = cell.dist, show.gene = "GAPDH",
+ old.fit = rvel.cd, do.par = TRUE)
Error in gene.relative.velocity.estimates(emat, nmat, deltaT = 1, kCells = 25, :
gene GAPDH is not present in the filtered expression matrices
> dev.off()
null device
1
> gene.relative.velocity.estimates(emat, nmat, deltaT = 1, kCells = 25,
+ kGenes = 1, fit.quantile = fit.quantile,
+ cell.emb = emb, cell.colors = cell.colors,
+ cell.dist = cell.dist, show.gene = "Gapdh",
+ old.fit = rvel.cd, do.par = TRUE)
calculating convolved matrices ... done
[1] 1
> dev.off()
null device
1
>
成功!欧耶!
重复一下结果还不一样耶,这个有点坑爹哇!
接下来就是如何把这个结果展示在我们自己的tsne图或者umap图上。
在原有的UMAP上标注RNA velocity - 云+社区 - 腾讯云
https://cloud.tencent.com/developer/article/1620345
当然,还是不出所料有问题,然后就去解决。
(重点是让行名即barcode一致)
#导入包
library(velocyto.R)
library(pagoda2)
YDL<- readRDS("YDL_NONPRO.rds")
ldat <- read.loom.matrices(file = "/data/yudonglin/singlecell/ABFC20190816-04-分析结果/NoPro/velocyto/NoPro.loom")
emat <- ldat$spliced
nmat <- ldat$unspliced
seurat.object<-YDL
emb <- seurat.object@reductions$umap@cell.embeddings
# Estimate the cell-cell distances
cell.dist <- as.dist(1-armaCor(t(seurat.object@reductions$umap@cell.embeddings)))
head(colnames(emat))
head(colnames(nmat))
colnames(emat) <- paste(substring(colnames(emat),7,22),"-1",sep="")
colnames(nmat) <- paste(substring(colnames(nmat),7,22),"-1",sep="")
head(colnames(emat))
head(colnames(nmat))
# I'm not sure what this parameter does to be honest. 0.02 default
# perform gamma fit on a top/bottom quantiles of expression magnitudes
fit.quantile <- 0.02
# Main velocity estimation
rvel.cd <- gene.relative.velocity.estimates(emat,nmat,deltaT=2,
kCells=10,
cell.dist=cell.dist,
fit.quantile=fit.quantile,
n.cores=24)
# This section gets the colors out of the seurat tSNE object so that my seurat and velocyto plots use the same color scheme.
library("Seurat")
library("ggplot2")
gg <- UMAPPlot(seurat.object)
ggplot_build(gg)$data
colors <- as.list(ggplot_build(gg)$data[[1]]$colour)
names(colors) <- rownames(emb)
p1 <- show.velocity.on.embedding.cor(emb,rvel.cd,n=30,scale='sqrt',
cell.colors=ac(colors,alpha=0.5),
cex=0.8,arrow.scale=2,show.grid.flow=T,
min.grid.cell.mass=1.0,grid.n=50,arrow.lwd=1,
do.par=F,cell.border.alpha = 0.1,
n.cores=24,main="RNA Velocity")
原来的umap图如下:
https://mp.weixin.qq.com/s/u7zsn7d1Ls2OxeGZfYsjCg