ixxmu / mp_duty

抓取网络文章到github issues保存
https://archives.duty-machine.now.sh/
103 stars 30 forks source link

RNA velocity分析代码 #3489

Closed ixxmu closed 1 year ago

ixxmu commented 1 year ago

https://mp.weixin.qq.com/s/YFyo39-cfgXrYR2I2-Ty9Q

ixxmu commented 1 year ago

RNA velocity分析代码 by 东林的扯淡小屋

分别是R语言版本和Python版本:

conda activate velocityexport OPENBLAS_NUM_THREADS=1set.seed(123)#导入包library(Seurat)library(tidyverse)library(patchwork)library(SCENIC)library(plyr)library(permute)library(data.table)library(SCopeLoomR)library(velocyto.R)library(pagoda2)setwd("/data3/yudonglin/ren")#YDL<- readRDS("/data3/yudonglin/ren/10X/cell_matastasis.RDS")
#liver metastasized tumor from rectal cancerx1 <-velocyto.R::read.loom.matrices("/data3/yudonglin/ren/HRR016237/velocyto/HRR016237.loom")splice<-x1$splicedunsplice<-x1$unsplicedhead(colnames(splice))head(colnames(unsplice))colnames(splice) <- paste("liver metastasized tumor from rectal cancer_",substring(colnames(splice),11,26),"-1",sep="")colnames(unsplice) <- paste("liver metastasized tumor from rectal cancer_",substring(colnames(unsplice),11,26),"-1",sep="")head(colnames(splice))head(colnames(unsplice))x1$spliced<-splicex1$unspliced<-unsplicetable(YDL@meta.data$orig.ident)#primary rectal cancerx2 <-velocyto.R::read.loom.matrices(file = "/data3/yudonglin/ren/HRR016238/velocyto/HRR016238.loom", engine = "hdf5r")splice<-x2$splicedunsplice<-x2$unsplicedhead(colnames(splice))head(colnames(unsplice))colnames(splice) <- paste("primary rectal cancer_",substring(colnames(splice),11,26),"-1",sep="")colnames(unsplice) <- paste("primary rectal cancer_",substring(colnames(unsplice),11,26),"-1",sep="")head(colnames(splice))head(colnames(unsplice))x2$spliced<-splicex2$unspliced<-unsplicetable(YDL@meta.data$orig.ident)#adjacent non-tumor tissue of hepatocellular carcinomax3 <-velocyto.R::read.loom.matrices(file = "/data3/yudonglin/ren/HRR016239/velocyto/HRR016239.loom", engine = "hdf5r")splice<-x3$splicedunsplice<-x3$unsplicedhead(colnames(splice))head(colnames(unsplice))colnames(splice) <- paste("adjacent non-tumor tissue of hepatocellular carcinoma_",substring(colnames(splice),11,26),"-1",sep="")colnames(unsplice) <- paste("adjacent non-tumor tissue of hepatocellular carcinoma_",substring(colnames(unsplice),11,26),"-1",sep="")head(colnames(splice))head(colnames(unsplice))x3$spliced<-splicex3$unspliced<-unsplicetable(YDL@meta.data$orig.ident)#liver tumorx4 <-velocyto.R::read.loom.matrices(file = "/data3/yudonglin/ren/HRR016240/velocyto/HRR016240.loom", engine = "hdf5r")splice<-x4$splicedunsplice<-x4$unsplicedhead(colnames(splice))head(colnames(unsplice))colnames(splice) <- paste("liver tumor_",substring(colnames(splice),11,26),"-1",sep="")colnames(unsplice) <- paste("liver tumor_",substring(colnames(unsplice),11,26),"-1",sep="")head(colnames(splice))head(colnames(unsplice))x4$spliced<-splicex4$unspliced<-unsplicespliced <- cbind(x1[["spliced"]], x2[["spliced"]],x3[["spliced"]], x4[["spliced"]])unspliced <- cbind(x1[["unspliced"]], x2[["unspliced"]],x3[["unspliced"]], x4[["unspliced"]])emat <- splicednmat <- unspliced# saveRDS(spliced,"spliced.rds")#saveRDS(unspliced,"unspliced.rds")# write.csv(unspliced,"unspliced.csv")seurat.object<-YDLemb <- seurat.object@reductions$tsne@cell.embeddings# Estimate the cell-cell distancescell.dist <- as.dist(1-armaCor(t(seurat.object@reductions$tsne@cell.embeddings)))fit.quantile <- 0.02# Main velocity estimationrvel.cd <- gene.relative.velocity.estimates(emat,nmat,deltaT=2, kCells=10, cell.dist=cell.dist, fit.quantile=fit.quantile, n.cores=48)# This section gets the colors out of the seurat tSNE object so that my seurat and velocyto plots use the same color scheme.library("Seurat")library("ggplot2")gg <- TSNEPlot(seurat.object)ggplot_build(gg)$datacolors <- as.list(ggplot_build(gg)$data[[1]]$colour)names(colors) <- rownames(emb)p1 <- show.velocity.on.embedding.cor(emb,rvel.cd,n=30,scale='sqrt', cell.colors=ac(colors,alpha=0.5), cex=0.8,arrow.scale=60,show.grid.flow=T, min.grid.cell.mass=1.0,grid.n=50,arrow.lwd=1, do.par=F,cell.border.alpha = 0.1, n.cores=48,main="RNA Velocity")p2<-show.velocity.on.embedding.cor(emb, rvel.cd, n = 200, scale = 'sqrt', cell.colors = ac(colors, alpha = 0.5), cex = 0.8, arrow.scale = 25, show.grid.flow = T, min.grid.cell.mass = 0.5, grid.n = 40, arrow.lwd = 1,do.par = T, cell.border.alpha = 0.1,n.cores=48,main="RNA Velocity")DimPlot(YDL, reduction = "umap", label = F, pt.size = 1.5)p3<-show.velocity.on.embedding.cor(emb, rvel.cd, n = 200, scale = 'sqrt', cell.colors = ac(colors, alpha = 0.5), cex = 0.8, arrow.scale = 50, show.grid.flow = T, min.grid.cell.mass = 0.5, grid.n = 40, arrow.lwd = 1,do.par = T, cell.border.alpha = 0.1,n.cores=48,main="RNA Velocity")p4<-show.velocity.on.embedding.cor(emb, rvel.cd, n = 200, scale = 'sqrt', cell.colors = ac(colors, alpha = 0.5), cex = 0.8, arrow.scale = 60, show.grid.flow = T, min.grid.cell.mass = 0.5, grid.n = 40, arrow.lwd = 1,do.par = T, cell.border.alpha = 0.1,n.cores=48,main="RNA Velocity")DimPlot(YDL, reduction = "umap", label = F, pt.size = 1.5)dev.off()savehistory("code.txt")


library(Seurat)library(SeuratDisk)library(SeuratWrappers)seurat_obj<- YDL# save metadata table:seurat_obj$barcode <- colnames(seurat_obj)seurat_obj$UMAP_1 <- seurat_obj@reductions$umap@cell.embeddings[,1]seurat_obj$UMAP_2 <- seurat_obj@reductions$umap@cell.embeddings[,2]seurat_obj$tSNE_1 <- seurat_obj@reductions$tsne@cell.embeddings[,1]seurat_obj$tSNE_2 <- seurat_obj@reductions$tsne@cell.embeddings[,2]write.csv(seurat_obj@meta.data, file='metadata.csv', quote=F, row.names=F)# write expression counts matrixlibrary(Matrix)counts_matrix <- GetAssayData(seurat_obj, assay='RNA', slot='counts')writeMM(counts_matrix, file='counts.mtx')# write dimesnionality reduction matrix, in this example case pca matrixwrite.csv(seurat_obj@reductions$pca@cell.embeddings, file='pca.csv', quote=F, row.names=F)# write gene nameswrite.table( data.frame('gene'=rownames(counts_matrix)),file='gene_names.csv', quote=F,row.names=F,col.names=F)



conda activate velocity
#conda activate velocytopythonimport scanpy as scimport anndatafrom scipy import iofrom scipy.sparse import coo_matrix, csr_matriximport numpy as npimport osimport pandas as pdimport matplotlib as mpl
import scvelo as scvimport scanpy as scimport cellrank as crimport numpy as npimport pandas as pdimport anndata as admpl.use('Agg')

# load sparse matrix:X = io.mmread("counts.mtx")
# create anndata objectadata = anndata.AnnData( X=X.transpose().tocsr())
# load cell metadata:cell_meta = pd.read_csv("metadata.csv")
# load gene names:with open("gene_names.csv", 'r') as f: gene_names = f.read().splitlines()
# set anndata observations and index obs by barcodes, var by gene namesadata.obs = cell_metaadata.obs.index = adata.obs['barcode']adata.var.index = gene_names
# load dimensional reduction:pca = pd.read_csv("pca.csv")pca.index = adata.obs.index
# set pca and umapadata.obsm['X_pca'] = pca.to_numpy()adata.obsm['X_umap'] = np.vstack((adata.obs['UMAP_1'].to_numpy(), adata.obs['UMAP_2'].to_numpy())).Tadata.obsm['X_tsne'] = np.vstack((adata.obs['tSNE_1'].to_numpy(), adata.obs['tSNE_2'].to_numpy())).T
# plot a UMAP colored by sampleID to test:sc.pl.umap(adata, color=['seurat_clusters'], frameon=False, save="True.pdf")sc.pl.tsne(adata, color=['seurat_clusters'], frameon=False, save="True.pdf")sc.pl.umap(adata, color=['celltype'], frameon=False, save="Truecelltype.pdf")sc.pl.tsne(adata, color=['celltype'], frameon=False, save="Truecelltype.pdf")



# save dataset as anndata formatadata.write('my_data.h5ad')
# reload datasetadata = sc.read_h5ad('my_data.h5ad')

ldata1=scv.read_loom("/data3/yudonglin/ren/HRR016237/velocyto/HRR016237.loom",validate=False)ldata2=scv.read_loom("/data3/yudonglin/ren/HRR016238/velocyto/HRR016238.loom",validate=False)ldata3=scv.read_loom("/data3/yudonglin/ren/HRR016239/velocyto/HRR016239.loom",validate=False)ldata4=scv.read_loom("/data3/yudonglin/ren/HRR016240/velocyto/HRR016240.loom",validate=False)

# rename barcodes in order to merge:barcodes = [bc.split(':')[1] for bc in ldata1.obs.index.tolist()]barcodes = ["liver metastasized tumor from rectal cancer_"+bc[0:len(bc)-1] for bc in barcodes]ldata1.obs.index = barcodes
barcodes = [bc.split(':')[1] for bc in ldata2.obs.index.tolist()]barcodes = ["primary rectal cancer_"+bc[0:len(bc)-1] for bc in barcodes]ldata2.obs.index = barcodes
barcodes = [bc.split(':')[1] for bc in ldata3.obs.index.tolist()]barcodes = ["adjacent non-tumor tissue of hepatocellular carcinoma_"+bc[0:len(bc)-1] for bc in barcodes]ldata3.obs.index = barcodes
barcodes = [bc.split(':')[1] for bc in ldata4.obs.index.tolist()]barcodes = ["liver tumor_"+bc[0:len(bc)-1] for bc in barcodes]ldata4.obs.index = barcodes



# make variable names uniqueldata1.var_names_make_unique()ldata2.var_names_make_unique()ldata3.var_names_make_unique()ldata4.var_names_make_unique()
#ldata = ldata1.concatenate([ldata1, ldata2, ldata3, ldata4],join = 'outer')
ldata = ldata1.concatenate([ldata2, ldata3, ldata4]) scv.utils.clean_obs_names(adata)scv.utils.clean_obs_names(ldata)
# concatenate the three loom#ldata = ldata1.concatenate([ldata1, ldata2, ldata3, ldata4]) # concatenate the three loom#ldata = ldata1.concatenate([ldata1, ldata2, ldata3, ldata4, ldata5])# merge matrices into the original adata objectadata = scv.utils.merge(adata, ldata)

# plot umap to checksc.pl.umap(adata, color='seurat_clusters', frameon=False, legend_loc='on data', title='', save='_celltypes.pdf')
sc.pl.tsne(adata, color='seurat_clusters', frameon=False, legend_loc='on data', title='', save='tsne.pdf')
sc.pl.umap(adata, color='celltype', frameon=False, legend_loc='on data', title='', save='_celltypes.pdf')
sc.pl.tsne(adata, color='celltype', frameon=False, legend_loc='on data', title='', save='tsne_celltype.pdf')
#scv.pl.proportions(adata, groupby='seurat_clusters',save="proportion.pdf")scv.pl.proportions(adata,save="proportion.pdf")
# pre-processscv.pp.filter_and_normalize(adata)scv.pp.moments(adata)# compute velocityscv.tl.velocity(adata, mode='stochastic')scv.tl.velocity_graph(adata)

scv.pl.velocity_embedding(adata, basis='umap', frameon=False, save='embedding_umap.pdf')scv.pl.velocity_embedding(adata, basis='tsne', frameon=False, save='embedding_tsne.pdf')

scv.pl.velocity_embedding_grid(adata, basis='umap', color='seurat_clusters', save='embedding_grid_umap.pdf', title='', scale=0.25)
scv.pl.velocity_embedding_grid(adata, basis='tsne', color='seurat_clusters', save='embedding_grid_tsne.pdf', title='', scale=0.25)

scv.pl.velocity_embedding_stream(adata, basis='umap', color='seurat_clusters', save='embedding_stream_umap.pdf', title='')scv.pl.velocity_embedding_stream(adata, basis='tsne', color='seurat_clusters', save='embedding_stream_tsne.pdf', title='')
#可以调节长宽比,不过最后只能输出图片,而不能是PDFscv.pl.velocity_embedding_stream(adata, basis='umap', color='seurat_clusters', figsize =(10, 10),save='embedding_stream_umap1.pdf', title='')scv.pl.velocity_embedding_stream(adata, basis='tsne', color='seurat_clusters', figsize =(10, 10),save='embedding_stream_tsne1.pdf', title='')



结果具有一致性。