jbloomlab / dms_variants

Analyze deep mutational scanning of barcoded variants.
Other
6 stars 9 forks source link

plotCumulVariantCounts and plotNumCodonMutsByType plotting errors #68

Closed Caleb-Carr closed 3 years ago

Caleb-Carr commented 3 years ago

Code was used from the "Analyze simulated library of codon variants" example from DMS_variants webpage for generating a dataset. The following code generates the dataset and attempts to generate three plots. The first plot using variants.plotCumulVariantCounts fails to filter the non-selected samples from the graph even though no data is generated. The second plot using variants.plotNumCodonMutsByType is a successful example with the orientation parameter removed. The third plot using variants.plotNumCodonMutsByType fails with an index error when the orientation parameter is added.

# NBVAL_IGNORE_OUTPUT

import collections
import itertools
import random
import tempfile
import time
import warnings

import pandas as pd

from plotnine import *

import scipy

import dmslogo  # used for preference logo plots

import dms_variants.binarymap
import dms_variants.codonvarianttable
import dms_variants.globalepistasis
import dms_variants.plotnine_themes
import dms_variants.simulate
from dms_variants.constants import CBPALETTE, CODONS_NOSTOP

# generate data
seed = 1  # random number seed
genelength = 30  # gene length in codons
libs = ['lib_1', 'lib_2']  # distinct libraries of gene
variants_per_lib = 500 * genelength  # variants per library
avgmuts = 2.0  # average codon mutations per variant
bclen = 16  # length of nucleotide barcode for each variant
variant_error_rate = 0.005  # rate at which variant sequence mis-called
avgdepth_per_variant = 200  # average per-variant sequencing depth
lib_uniformity = 5  # uniformity of library pre-selection
noise = 0.02  # random noise in selections
bottlenecks = {  # bottlenecks from pre- to post-selection
        'tight_bottle': variants_per_lib * 5,
        'loose_bottle': variants_per_lib * 100,
        }

random.seed(seed)

warnings.simplefilter('ignore')

theme_set(dms_variants.plotnine_themes.theme_graygrid())

geneseq = ''.join(random.choices(CODONS_NOSTOP, k=genelength))
print(f"Wildtype gene of {genelength} codons:\n{geneseq}")

variants = dms_variants.simulate.simulate_CodonVariantTable(
                geneseq=geneseq,
                bclen=bclen,
                library_specs={lib: {'avgmuts': avgmuts,
                                     'nvariants': variants_per_lib}
                               for lib in libs},
                seed=seed,
                )

phenosimulator = dms_variants.simulate.SigmoidPhenotypeSimulator(
                                            geneseq,
                                            seed=seed)

counts = dms_variants.simulate.simulateSampleCounts(
        variants=variants,
        phenotype_func=phenosimulator.observedEnrichment,
        variant_error_rate=variant_error_rate,
        pre_sample={'total_count': variants_per_lib *
                        scipy.random.poisson(avgdepth_per_variant),
                    'uniformity': lib_uniformity},
        pre_sample_name='pre-selection',
        post_samples={name: {'noise': noise,
                             'total_count': variants_per_lib *
                                 scipy.random.poisson(avgdepth_per_variant),
                             'bottleneck': bottle}
                          for name, bottle in bottlenecks.items()},
        seed=seed,
        )

variants.add_sample_counts_df(counts)

# example of plot not fltering other sample graphs
p = variants.plotCumulVariantCounts(
                                    samples=['pre-selection']
                                    )
_ = p.draw()

# example of plot working without the orientation parameter
p = variants.plotNumCodonMutsByType(
                                    variant_type='all',
                                    #orientation='v',
                                    libraries=variants.libraries,
                                    samples=['pre-selection', 'tight_bottle']
                                    )
p = p + theme(panel_grid_major_x=element_blank())  # no vertical grid lines
_ = p.draw()

# broken example with index error
p = variants.plotNumCodonMutsByType(
                                    variant_type='all',
                                    orientation='v',
                                    libraries=variants.libraries,
                                    samples=['pre-selection', 'tight_bottle']
                                    )
p = p + theme(panel_grid_major_x=element_blank())  # no vertical grid lines
_ = p.draw()