Code was used from the "Analyze simulated library of codon variants" example from DMS_variants webpage for generating a dataset. The following code generates the dataset and attempts to generate three plots. The first plot using variants.plotCumulVariantCounts fails to filter the non-selected samples from the graph even though no data is generated. The second plot using variants.plotNumCodonMutsByType is a successful example with the orientation parameter removed. The third plot using variants.plotNumCodonMutsByType fails with an index error when the orientation parameter is added.
# NBVAL_IGNORE_OUTPUT
import collections
import itertools
import random
import tempfile
import time
import warnings
import pandas as pd
from plotnine import *
import scipy
import dmslogo # used for preference logo plots
import dms_variants.binarymap
import dms_variants.codonvarianttable
import dms_variants.globalepistasis
import dms_variants.plotnine_themes
import dms_variants.simulate
from dms_variants.constants import CBPALETTE, CODONS_NOSTOP
# generate data
seed = 1 # random number seed
genelength = 30 # gene length in codons
libs = ['lib_1', 'lib_2'] # distinct libraries of gene
variants_per_lib = 500 * genelength # variants per library
avgmuts = 2.0 # average codon mutations per variant
bclen = 16 # length of nucleotide barcode for each variant
variant_error_rate = 0.005 # rate at which variant sequence mis-called
avgdepth_per_variant = 200 # average per-variant sequencing depth
lib_uniformity = 5 # uniformity of library pre-selection
noise = 0.02 # random noise in selections
bottlenecks = { # bottlenecks from pre- to post-selection
'tight_bottle': variants_per_lib * 5,
'loose_bottle': variants_per_lib * 100,
}
random.seed(seed)
warnings.simplefilter('ignore')
theme_set(dms_variants.plotnine_themes.theme_graygrid())
geneseq = ''.join(random.choices(CODONS_NOSTOP, k=genelength))
print(f"Wildtype gene of {genelength} codons:\n{geneseq}")
variants = dms_variants.simulate.simulate_CodonVariantTable(
geneseq=geneseq,
bclen=bclen,
library_specs={lib: {'avgmuts': avgmuts,
'nvariants': variants_per_lib}
for lib in libs},
seed=seed,
)
phenosimulator = dms_variants.simulate.SigmoidPhenotypeSimulator(
geneseq,
seed=seed)
counts = dms_variants.simulate.simulateSampleCounts(
variants=variants,
phenotype_func=phenosimulator.observedEnrichment,
variant_error_rate=variant_error_rate,
pre_sample={'total_count': variants_per_lib *
scipy.random.poisson(avgdepth_per_variant),
'uniformity': lib_uniformity},
pre_sample_name='pre-selection',
post_samples={name: {'noise': noise,
'total_count': variants_per_lib *
scipy.random.poisson(avgdepth_per_variant),
'bottleneck': bottle}
for name, bottle in bottlenecks.items()},
seed=seed,
)
variants.add_sample_counts_df(counts)
# example of plot not fltering other sample graphs
p = variants.plotCumulVariantCounts(
samples=['pre-selection']
)
_ = p.draw()
# example of plot working without the orientation parameter
p = variants.plotNumCodonMutsByType(
variant_type='all',
#orientation='v',
libraries=variants.libraries,
samples=['pre-selection', 'tight_bottle']
)
p = p + theme(panel_grid_major_x=element_blank()) # no vertical grid lines
_ = p.draw()
# broken example with index error
p = variants.plotNumCodonMutsByType(
variant_type='all',
orientation='v',
libraries=variants.libraries,
samples=['pre-selection', 'tight_bottle']
)
p = p + theme(panel_grid_major_x=element_blank()) # no vertical grid lines
_ = p.draw()
Code was used from the "Analyze simulated library of codon variants" example from DMS_variants webpage for generating a dataset. The following code generates the dataset and attempts to generate three plots. The first plot using
variants.plotCumulVariantCounts
fails to filter the non-selected samples from the graph even though no data is generated. The second plot usingvariants.plotNumCodonMutsByType
is a successful example with theorientation
parameter removed. The third plot usingvariants.plotNumCodonMutsByType
fails with an index error when the orientation parameter is added.