thethoughtemporium / Whose-gene-is-it-anyway

768 stars 82 forks source link

Codon Shuffle code #17

Open dobrosketchkun opened 3 years ago

dobrosketchkun commented 3 years ago

Why don't you use BioPython? With it, you don't need to deal with external codons tables and checks and it has proper instruments for dealing with sequences. You can also use specific codon tables.

A toy code example:

import random
import numpy as np
from Bio.Data import CodonTable
from Bio.Seq import Seq
random.seed(2128506)

def new_random_DNA(codons, stop_codons, protein_length): 
    '''
    Generates random DNA sequence with length 3*protein_length from codons list and without
    items from stop_codons list
    '''

    random_DNA = []
    checkLengthFlag = False
    while not  checkLengthFlag:
        random_codon = ''.join(random.choices(codons, k=3))
        if random_codon not in stop_codons:
        random_DNA.append(random_codon)
        if len(random_DNA) == 3 * protein_length:
        checkLengthFlag = True
    random_DNA =''.join(random_DNA)
    return random_DNA

def random_DNA_shuffle(seq_DNA, aa_coding, times=1):
    '''
    Shuffles DNA sequence while keeping protein sequence intact
    '''
    seq_DNA = Seq(''.join(seq_DNA))  
    protein = seq_DNA.translate()
    aa_list = list(protein)
    new_DNA_list = []
    for _ in range(times):
        new_DNA = ''
        for aa in aa_list:
            new_codon = random.choice(aa_coding[aa])
            new_DNA += new_codon
        new_DNA_list.append(new_DNA)
    return new_DNA_list

aa_coding = dict()
for key in _temp:
    aa_coding.setdefault(_temp[key], []).append(key)

table = CodonTable.unambiguous_dna_by_name["Standard"] 
stop_codons = table.stop_codons
codons = table.nucleotide_alphabet
protein_length = 10

aa_coding = dict()
for key in _temp:
    aa_coding.setdefault(_temp[key], []).append(key)

seq_DNA = new_random_DNA(codons, stop_codons, protein_length)
new = random_DNA_shuffle(seq_DNA, aa_coding, times=1)

print(random_DNA) # preview 
for seq in new:
    print(seq)
print(Seq(random_DNA).translate())
for seq in new:
    print(Seq(seq).translate()