Closed Fdm8610 closed 11 months ago
Hi
This post is pretty difficult to read. Please take time to format your issues correctly.
It looks like the code was written for v0.14.8. Please see the README for differences between the most recent version of pomegranate and that one. You can either install the older version or update this code
I am trying to run this guide_calling.py
import pandas as pd import numpy as np import csv import os import matplotlib.pyplot as plt from matplotlib import gridspec import seaborn as sns; sns.set() from scipy.signal import argrelextrema from scipy.stats import gaussian_kde from pomegranate.gmm import *
def find(name, path): for root, dirs, files in os.walk(path): if name in files: return os.path.join(root, name)
def load_gbc_reads(experiment): output_dir = os.path.join(experiment, 'outs')
def capture_reads(gbc_reads, cell_barcodes): print ("Filtering by cell barcode...")
we will be conservative and merge reads not just based on cell BC and UMI, but also based on mapping
def find_threshold(coverage_data, gemgroup):
find location of upper mode by fitting KDE and finding extrema
def identify_cells(captured_gbc_table, coverage_thresholds, read_threshold=50, umi_threshold=3):
for each cell we will take the top identity by read count (UMI count is finicky because then a bunch of *'s will infiltrate at low coverage)
def write_identities(experiment, cell_identities, cell_barcodes): output_dir = os.path.join(experiment, 'outs')
def plot_umi_distribution(table, gemgroup): ax = sns.distplot(table.query('good_coverage')['UMI_count']) yrange = ax.get_ylim() umi_mean = table.query('good_coverage')['UMI_count'].mean() plt.plot(np.array([1,1])*(umi_mean), [0, yrange[1]]) plt.title('Gemgroup {0}'.format(gemgroup)) print ('Gemgroup {0}: UMI mean is {1:0.2f}'.format(gemgroup, umi_mean)) sns.despine()
def plot_umi_read_distribution(table, gemgroup): num_identities = table.groupby(level=0).count()['read_count']
def MixedModelCall(guide,gbc_table,library,directory): data=np.array(np.log2(gbc_table.reset_index()[gbc_table.reset_index()['guide_identity']==guide]['UMI_count'])) data=data.reshape(-1,1)
def MixedModelPlot(guide,gbc_table,library,directory,model,data,gmm_x):
Plot histograms and gaussian curves
with this
from guide_calling import * import os
%matplotlib inline
%load_ext autoreload %autoreload 2
sns.set_style('white') pd.set_option('display.float_format', lambda x: '%.2f' % x)
from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "all"
sample='test' guide_calls='guide_calls/' output_dir='output/'
load guide barcode reads from outs/guide_barcode_reads.txt.gz file in experiment folder
gbc_reads = pd.read_table('guide_barcode_reads.txt',sep=',', header=None, names=('guide_identity', 'cell_barcode', 'UMI')) cell_barcodes = pd.read_csv('barcodes.tsv.gz', sep='\t', header=None, names=['cell_barcode'])
remove unmapped reads
this is a conservative step
in some cases, these unmapped reads represent real UMIs/CBCs but fail to map
gbc_reads=gbc_reads[gbc_reads['guide_identity']!='*']
collect reads that come from valid cell barcodes as determined by scRNAseq
captured_gbc_table = capture_reads(gbc_reads, cell_barcodes)
determine thresholds
os.system('[[ -d '+guide_calls+' ]] || mkdir '+guide_calls) pop=pd.DataFrame() for guide in captured_gbc_table.reset_index()['guide_identity'].unique(): pop=pop.append(MixedModelCall(guide,captured_gbc_table,sample,guide_calls))
However I am not able to call the GeneralMixtureModel.
I also tried to import like this, from pomegranate import gmm but then I get --> 176 model = GeneralMixtureModel.from_samples([PoissonDistribution,NormalDistribution],2,data) 177 if numpy.isnan(model.probability(gmm_x)).any(): 178 i=0
AttributeError: type object 'GeneralMixtureModel' has no attribute 'from_samples'
How to fix this?
Thank you