NEW_ANIMAL database

fastadir_NEW_ANIMAL = r'C:\multi_padlock_design\NEW_ANIMAL' fasta_filenum_NEW_ANIMAL = #_OF_FILES fasta_pre_suffix_NEW_ANIMAL = ('QUERY_TO_FIND_RNA_REFERENCE', '.rna.fna')

########2. Add NEW_ANIMAL in "parblast.py" line39

os.path.join( fastadir[species], ("mouse", "human", "NEW_ANIMAL")[species] + ".transcriptome" + '"', ),

line82 elif designpars[0] == "NEW_ANIMAL": species = 2

##########3. Edit "retrieveseq.py" from line 23 fastadir = (config.fastadir_mouse, config.fastadir_human, config.fastadir_NEW_ANIMAL) fasta_filenum = (config.fasta_filenum_mouse, config.fasta_filenum_human, config.fasta_filenum_NEW_ANIMAL) fasta_pre_suffix = (config.fasta_pre_suffix_mouose, config.fasta_pre_suffix_human, config.fasta_pre_suffix_NEW_ANIMAL)

if species == "mouse":
    s = 0
elif species == "human":
    s = 1
elif species == "NEW_ANIMAL":
    s = 2

###########4. Edit "checkinput.py"

From line 20

def checkspecies(species): """Only human, mouse and NEW_ANIMAL are currently supported""" success = False if species not in ["mouse", "human", "NEW_ANIMAL"]: print("Could not identify species. Try again.") else: success = True return success

From line 132

# loop until all the keyboard inputs are correct
while not success_s:
    species = input("Specify the species (human, mouse, or NEW_ANIMAL): ").lower()
    success_s = checkspecies(species)

##########5. Edit "formatrefseq.py"

######## modify the following code from the line 104 def blastdb(species): """Format fasta sequences to BLAST database""" fastadir = (config.fastadir_mouse, config.fastadir_human, config.fastadir_NEW_ANIMAL) nfiles = (config.fasta_filenum_mouse, config.fasta_filenum_human, config.fasta_filenum_NEW_ANIMAL) filename = (config.fasta_pre_suffix_mouose, config.fasta_pre_suffix_human, config.fasta_pre_suffix_NEW_ANIMAL)

######## Add the following to the line 114 elif species == "NEW_ANIMAL": s = 2

########### If you need to include the predicted sequences in the database,

Original code from line58

# retain only NM and NR entries
for c in range(len(Headers) - 1, -1, -1):
    if "|" in Headers[c]:
        header = Headers[c].split("|")
        if len(header) <= 3:
            if not (
                header[1][:2] == "NM" or header[1][:2] == "NR"
            ):  # new NCBI fna format
                del Headers[c]
                del Seq[c]
        else:
            if not (
                header[3][:2] == "NM" or header[3][:2] == "NR"
            ):  # old NCBI fna format
                del Headers[c]
                del Seq[c]
    else:
        if not (
            Headers[c][1:3] == "NM" or Headers[c][1:3] == "NR"
        ):  # new NCBI single fasta file format
            del Headers[c]
            del Seq[c]

########Replace with the followings

include XM and XR sequence as well as NM and NR entries

for c in range(len(Headers) - 1, -1, -1):
    if "|" in Headers[c]:
        header = Headers[c].split("|")
        if len(header) <= 3:
            if not (
                header[1][:2] in ["NM", "NR", "XM", "XR"]
            ):  # new NCBI fna format
                del Headers[c]
                del Seq[c]
        else:
            if not (
                header[3][:2] in ["NM", "NR", "XM", "XR"]
            ):  # old NCBI fna format
                del Headers[c]
                del Seq[c]
    else:
        if not (
            Headers[c][1:3] in ["NM", "NR", "XM", "XR"]
        ):  # new NCBI single fasta file format
            del Headers[c]
            del Seq[c]

######### 6. If you need to include predicted sequence, comment out the line 34 in "readblast.py"

if not ("XR" in line or "XM" in line): # skip all predicted transcripts

Moldia / multi_padlock_design

Inquiry Regarding Program Compatibility for Non-Human/Mouse Species and macOS System #14