Why don't you use BioPython? With it, you don't need to deal with external codons tables and checks and it has proper instruments for dealing with sequences. You can also use specific codon tables.
A toy code example:
import random
import numpy as np
from Bio.Data import CodonTable
from Bio.Seq import Seq
random.seed(2128506)
def new_random_DNA(codons, stop_codons, protein_length):
'''
Generates random DNA sequence with length 3*protein_length from codons list and without
items from stop_codons list
'''
random_DNA = []
checkLengthFlag = False
while not checkLengthFlag:
random_codon = ''.join(random.choices(codons, k=3))
if random_codon not in stop_codons:
random_DNA.append(random_codon)
if len(random_DNA) == 3 * protein_length:
checkLengthFlag = True
random_DNA =''.join(random_DNA)
return random_DNA
def random_DNA_shuffle(seq_DNA, aa_coding, times=1):
'''
Shuffles DNA sequence while keeping protein sequence intact
'''
seq_DNA = Seq(''.join(seq_DNA))
protein = seq_DNA.translate()
aa_list = list(protein)
new_DNA_list = []
for _ in range(times):
new_DNA = ''
for aa in aa_list:
new_codon = random.choice(aa_coding[aa])
new_DNA += new_codon
new_DNA_list.append(new_DNA)
return new_DNA_list
aa_coding = dict()
for key in _temp:
aa_coding.setdefault(_temp[key], []).append(key)
table = CodonTable.unambiguous_dna_by_name["Standard"]
stop_codons = table.stop_codons
codons = table.nucleotide_alphabet
protein_length = 10
aa_coding = dict()
for key in _temp:
aa_coding.setdefault(_temp[key], []).append(key)
seq_DNA = new_random_DNA(codons, stop_codons, protein_length)
new = random_DNA_shuffle(seq_DNA, aa_coding, times=1)
print(random_DNA) # preview
for seq in new:
print(seq)
print(Seq(random_DNA).translate())
for seq in new:
print(Seq(seq).translate()
Why don't you use BioPython? With it, you don't need to deal with external codons tables and checks and it has proper instruments for dealing with sequences. You can also use specific codon tables.
A toy code example: