Open zzzseeu opened 1 year ago
Have you tried using the most recent version of pyscenic? I would also try checking whether the line dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
ran correctly. Inspect the dbs
object.
hello, I encounter the same problem, I would like to ask, have you solved this problem and how did you solve it?
Describe the bug When the program runs to the prune2df step, it will report an error: ArrowInvalid: Not a feather file. I read your previous discussion and thought it was a problem with the integrity of the database file, but I checked my database and there is no problem.
Steps to reproduce the behavior
from arboreto.utils import load_tf_names from arboreto.algo import grnboost2
from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase from pyscenic.utils import modules_from_adjacencies from pyscenic.prune import prune2df, df2regulons from pyscenic.aucell import aucell
import seaborn as sns
DATA_FOLDER="/home/xinzhou/scenic_test/" DATABASE_FOLDER = "/home/xinzhou/scenic_test/cisTarget_databases/"
DATABASES_GLOB = os.path.join(DATABASE_FOLDER, "mm9-*.mc9nr.genes_vs_motifs.rankings.feather")
MOTIF_ANNOTATIONS_FNAME = os.path.join(DATABASE_FOLDER, "motifs-v9-nr.mgi-m0.001-o0.0.tbl")
MM_TFS_FNAME = os.path.join(DATABASE_FOLDER, 'allTFs_mm.txt') SC_EXP_FNAME = os.path.join(DATA_FOLDER, "RNA-seq-counts_202207.csv")
ADJACENCIES_FNAME = os.path.join(DATA_FOLDER, "adjacencies.tsv") MODULES_FNAME = os.path.join(DATA_FOLDER, "modules.p") MOTIFS_FNAME = os.path.join(DATA_FOLDER, "motifs.csv") REGULONS_FNAME = os.path.join(DATA_FOLDER, "regulons.p")
N_SAMPLES = 12
ex_matrix = pd.read_csv(SC_EXP_FNAME, sep=',', header=0, index_col=0).T
tf_names = load_tf_names(MM_TFS_FNAME)
db_fnames = glob.glob(DATABASES_GLOB) def name(fname): return os.path.splitext(os.path.basename(fname))[0] dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
adjancencies = grnboost2(expression_data=ex_matrix, tf_names=tf_names, verbose=True)
adjancencies.to_csv(ADJACENCIES_FNAME, index=False, sep='\t')
adjancencies = pd.read_csv('/home/xinzhou/scenic_test/adjacencies.tsv', sep='\t')
modules = list(modules_from_adjacencies(adjancencies, ex_matrix))
with open(MODULES_FNAME, 'wb') as f:
pickle.dump(modules, f)
with open(MODULES_FNAME, 'rb') as f: modules = pickle.load(f) df = prune2df(dbs, modules, MOTIF_ANNOTATIONS_FNAME)
Please complete the following information: