For any custom genomes - Githubissues

Hi, you can prepare the cDNA file like this,

from Bio import SeqIO

records = SeqIO.parse("yourGenome.fa", "fasta")
chr2seq = {}
for record in records:
    id = str(record.id)
    chr2seq[record.id] = str(record.seq)

REVERSE_COMPLEMENT = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C', 'N': 'N'}
def extract_seq(chr, start, end):
    seq = chr2seq[chr][start:end]
    return seq

with open("Organism.cdna.fa", 'w') as w:
    with open("your.gff") as f:
        for line in f:
            if line.startswith("#"):
                continue
            fields = line.strip().split("\t")
            if fields[2] == "transcript":
                chr = fields[0]
                tstart = int(fields[3]) - 1
                tend = int(fields[4])
                strand = fields[6]
                transcript_id = "up to your format";gene_id = "up to your format"
                line = f.readline()
                fields = line.strip().split("\t")
                exon_seqs = ""
                while line.startswith("chr") and fields[2] == "exon":
                    estart = int(fields[3]) - 1;eend = int(fields[4])
                    exon_seqs += extract_seq(chr, estart, eend)
                    line = f.readline()
                    fields = line.strip().split("\t")

                exon_seqs = exon_seqs.upper()
                exon_seqs = exon_seqs if strand == "+" else "".join([REVERSE_COMPLEMENT[base] for base in exon_seqs[::-1]])
                if exon_seqs:
                    w.write(f">{transcript_id}|{gene_id}|{str(tstart)}|{str(tend)}|{chr}|{strand}\n{exon_seqs}\n")
Bulabula45 / Readon

For any custom genomes #2