Closed 51mystic closed 1 year ago
""" File: explode_get_fastas_file.py Author: Brant Faircloth
Created by Brant Faircloth on 12 June 2013 23:06 PDT (-0700) Copyright (c) 2013 Brant C. Faircloth. All rights reserved.
Description: Given the fasta file produced by get_fastas_from_match_counts.py, explode that concatenated fasta file into many separate files, 1 per locus.
"""
import os import sys import argparse from collections import defaultdict from Bio import SeqIO from phyluce.helpers import FullPaths
import pdb
def get_args(): """Get arguments from CLI""" parser = argparse.ArgumentParser( description="""Explode the fasta file produced by get_fastas_from_match_counts into single files""" ) parser.add_argument( "--input", required=True, action=FullPaths, help="""The input fasta file to explode""", ) parser.add_argument( "--output", required=True, action=FullPaths, help="""The output directory to create and in which to store the fastas""", ) parser.add_argument( "--by-taxon", action="store_true", default=False, help="""Split file by taxon and not by locus""", ) parser.addargument( "--split-char", type=str, default="", help="""The character to split on""" ) return parser.parse_args()
def main(): args = get_args() seqdict = defaultdict(list) if os.path.isdir(args.output): response = input( "{} exists. Add results to directory [Y/n]? ".format(args.output) ) if response == "Y": pass else: sys.exit() else: os.makedirs(args.output) print("Reading fasta...") if not args.by_taxon: with open(args.input, "rU") as input: for seq in SeqIO.parse(input, "fasta"): uce = seq.id.split(args.split_char)[0] seqdict[uce].append(seq) elif args.by_taxon: with open(args.input, "rU") as input: for seq in SeqIO.parse(input, "fasta"): taxon = "-".join(seq.id.split(args.split_char)[1:]) seqdict[taxon].append(seq) print("Writing fasta...") for uce, sequences in seqdict.items(): with open( os.path.join(args.output, "{}.unaligned.fasta".format(uce)), "w" ) as outf: for sequence in sequences: outf.write(sequence.format("fasta"))
if name == "main": main()
Something is wrong with how you are running the program - it is unclear why there are ### symbols before the program call.
Teacher, if I have to use this script, can you give me some modification suggestions?
I can't really. All I can say is that the program needs to be run without the ###
in front. This is not a bug in the program, it's a problem in the way the program is being run.
Thank you for your advice. I will look for where the problem lies.
Dear teacher, I have made some modifications to the script you made, and it will run normally. The script is as follows, and the modifications have been marked for you.
""" File: explode_get_fastas_file.py Author: Brant Faircloth
Created by Brant Faircloth on 12 June 2013 23:06 PDT (-0700) Copyright (c) 2013 Brant C. Faircloth. All rights reserved.
Description: Given the fasta file produced by get_fastas_from_match_counts.py, explode that concatenated fasta file into many separate files, 1 per locus.
"""
import os import sys import argparse from collections import defaultdict from Bio import SeqIO from phyluce.helpers import FullPaths
import pdb
def get_args(): """Get arguments from CLI""" parser = argparse.ArgumentParser( description="""Explode the fasta file produced by get_fastas_from_match_counts into single files""" ) parser.add_argument( "--input", required=True, action=FullPaths, help="""The input fasta file to explode""", ) parser.add_argument( "--output", required=True, action=FullPaths, help="""The output directory to create and in which to store the fastas""", ) parser.add_argument( "--by-taxon", action="store_true", default=False, help="""Split file by taxon and not by locus""", ) parser.addargument( "--split-char", type=str, default="", help="""The character to split on""" ) return parser.parse_args()
def main(): args = get_args() seqdict = defaultdict(list) if os.path.isdir(args.output): response = input( "{} exists. Add results to directory [Y/n]? ".format(args.output) ) if response == "Y": pass else: sys.exit() else: os.makedirs(args.output) print("Reading fasta...") if not args.by_taxon: with open(args.input, "rU") as ### infile: for seq in SeqIO.parse(### infile, "fasta"): uce = seq.id.split(args.split_char)[0] seqdict[uce].append(seq) elif args.by_taxon: with open(args.input, "rU") as### infile: for seq in SeqIO.parse(### infile, "fasta"): taxon = "-".join(seq.id.split(args.split_char)[1:]) seqdict[taxon].append(seq) print("Writing fasta...") for uce, sequences in seqdict.items(): with open( os.path.join(args.output, "{}.unaligned.fasta".format(uce)), "w" ) as outf: for sequence in sequences: outf.write(sequence.format("fasta"))
if name == "main": main()
Dear teacher: When I executed the following command, I got an error that I didn’t know how to solve. Here is the command I executed:
### _phyluce_assembly_explode_get_fastasfile --input /mnt/data/userdata/svip019/00----outcome/uce-o/uce-matchdata-extract/matedata-extract-merge.fasta --output /mnt/data/userdata/svip019/00 ----outcome/uce-o/uce-matchdata-extract/individual-files --by-taxon
The error is as follows: Traceback (most recent call last): File "/mnt/data/userdata/svip019/anaconda3/envs/phyluce-1.7.2/bin/phyluce_assembly_explode_get_fastas_file", line 88, in
main()
File "/mnt/data/userdata/svip019/anaconda3/envs/phyluce-1.7.2/bin/phyluce_assembly_explode_get_fastas_file", line 58, in main
response = input(
UnboundLocalError: local variable '### input' referenced before assignment
Please tell me how to solve it, thank you very much.