Open RafaellaFerraz opened 6 months ago
Hello this should work. Cheers
import anndata as ad
import scanpy as sc
import pandas as pd
import numpy as np
def starsolo_velocity_anndata(input_dir):
# Load Genes and Cells identifiers
"""
input directory should contain barcodes.tsv, features.tsv with 3 mtx from spliced, ambigious, unspliced
"""
obs = pd.read_csv(os.path.join(input_dir,'barcodes.tsv'), header = None, index_col = 0)
# Remove index column name to make it compliant with the anndata format
obs.index.name = None
var = pd.read_csv(os.path.join(input_dir,"features.tsv"), sep='\t',names = ('gene_ids', 'feature_types'), index_col = 1)
var.index.name = None
from scipy import io,sparse
spliced=sci.sparse.csr_matrix(sci.io.mmread(os.path.join(input_dir,"spliced.mtx")).T)
ambiguous=sci.sparse.csr_matrix(sci.io.mmread(os.path.join(input_dir,"ambiguous.mtx")).T)
unspliced=sci.sparse.csr_matrix(sci.io.mmread(os.path.join(input_dir,"unspliced.mtx")).T)
adata=ad.AnnData(X=spliced,obs=obs,var=var,layers={'spliced':spliced,"ambiguous":ambiguous,"unspliced":unspliced})
adata.var_names_make_unique()
return adata
Hi, Could anyone explain the Velocyto output from starsolo? It generates 5 files (ambiguous.mtx, barcodes.tsv, features.tsv, spliced.mtx, and unspliced.mtx), but I don't know how I can convert them into an anndata to use in scvelo. I found an issue with a code option (https://github.com/alexdobin/STAR/issues/774#issuecomment-850477636), but this does not match all Velocyto output files.
Thanks!