import pyfaidx
def extract_sequences_from_bed(fasta_path, bed_path, output_path):
# Open the FASTA file
fasta = pyfaidx.Fasta(fasta_path)
# Open the output file
with open(output_path, 'w') as output_file:
# Open the BED file
with open(bed_path, 'r') as bed_file:
for line in bed_file:
fields = line.strip().split('\t')
chrom = fields[0]
start = int(fields[1])
end = int(fields[2])
# Use pyfaidx to extract the sequence
sequence = fasta[chrom][start:end]
# Write the sequence to the output file
output_file.write(f'>{chrom}:{start}-{end}\n')
output_file.write(str(sequence) + '\n')
Actually probably want two output files, one in BED format, the other in FASTA format.
Pretty simple with
pyfaidx
, something like:Actually probably want two output files, one in BED format, the other in FASTA format.