In this function if the meth_positions_fwd/rev is [], then the whole function will fail even though there might be methylated occurences on one of the strand.
def motif_model_contig(pileup, contig: str, motif, save_motif_positions=False):
"""
Get the posterior for a single motif. Uses number of methylated motifs as methylation count.
Parameters:
- pileup (Pileup): The pileup to be processed.
- contig (str): The contig to be processed.
- motif (str): The motif to be processed.
- motif_meth_index (int): The index of the methylation site in the motif.
Returns:
- BetaBernoulliModel: The model for the candidate.
"""
# Strip motif of periods # Strip motif of periods
motif_stripped = motif.new_stripped_motif()
meth_positions_fwd = pileup.filter(pl.col("strand") == "+")["position"].to_numpy()
meth_positions_rev = pileup.filter(pl.col("strand") == "-")["position"].to_numpy()
index_meth_fwd, index_nonmeth_fwd = methylated_motif_occourances(motif_stripped, contig, meth_positions_fwd)
index_meth_rev, index_nonmeth_rev = methylated_motif_occourances(motif_stripped.reverse_compliment(), contig, meth_positions_rev)
model = BetaBernoulliModel()
model.update(len(index_meth_fwd) + len(index_meth_rev), len(index_nonmeth_fwd) + len(index_nonmeth_rev))
return model
Comes from here:
def methylated_motif_occourances(motif, sequence, methylated_positions) -> tuple:
"""
Get occourances of a motif in a contig
Parameters:
- motif (str): The motif to search for.
- seq (str): The contig to search in.
- contig_meth_positions (list): The positions of the methylation sites in the contig.
Returns:
- tuple: A tuple of two numpy arrays, the first containing the positions of the methylated motifs, the second containing the positions of the non-methylated motifs.
"""
assert len(motif) > 0, "Motif is empty"
assert len(sequence) > 0, "Sequence is empty" <----------------------------------
assert len(methylated_positions) > 0, "Methylated positions is empty"
assert type(motif) == Motif, "Motif is not a Motif type"
# Get the index of the methylation in the motif in the contig
motif_index = subseq_indices(motif.string, sequence) + motif.mod_position
# Methylated motif positions
meth_occurences = np.intersect1d(methylated_positions, motif_index)
# Non-methylated motif positions
nonmeth_occurences = np.setdiff1d(motif_index, methylated_positions)
return meth_occurences, nonmeth_occurences
In this function if the meth_positions_fwd/rev is [], then the whole function will fail even though there might be methylated occurences on one of the strand.
Comes from here: