Closed Adamtaranto closed 4 years ago
Hi @Adamtaranto,
You're correct, IPA headers aren't compatible with Falcon-phase. In the next release of IPA, we are adding a conversion script under falconc:
Usage:
ipa2-to-falcon-unzip [required&optional-params]
Rename IPA2 fasta header names to match falcon
Options:
-h, --help print this cligen-erated help
--help-syntax advanced: prepend,plurals,..
-i=, --input-p-fn= string REQUIRED input primary contigs
--input-a-fn= string REQUIRED input associate contigs
-o=, --output-prefix= string REQUIRED prefix for output files
In the meantime here is the source code. The logic is trivial.
from ./util import nil
from strformat import fmt
import hts
import strutils
proc renamedSeq*(name: string): string =
let name_parts = name.split({'.', '-'})
if name_parts.len > 2:
if name_parts[2] != "01":
return ""
return "{name_parts[1]}_{name_parts[2]}".fmt
else:
return "{name_parts[1]}".fmt
type
POrA = enum
# val = (string version of val),
pCtg = "p",
aCtg = "a",
proc renameSeqs(seq_fn, output_prefix: string, extension: POrA) =
var refx: hts.Fai
if not hts.open(refx, seq_fn):
util.raiseEx(format("[FATAL] Could not open '$#'", seq_fn))
var f = open("{output_prefix}.{extension}.fasta".fmt, fmWrite)
for i in 0 .. (refx.len - 1):
let ctgSeq = refx.get(refx[i])
let new_name = renamedSeq(refx[i])
if new_name == "":
continue
f.write('>')
f.writeLine(new_name)
f.writeLine(ctgSeq)
f.close
proc main*(input_p_fn, input_a_fn, output_prefix: string) =
##Rename IPA2 fasta header names to match falcon
renameSeqs(input_p_fn, output_prefix, pCtg)
renameSeqs(input_a_fn, output_prefix, aCtg)
Is falcon-Phase compatible with the output of IPA? The naming convention for the primary contigs looks the same, but the alternate contigs from IPA have a slightly different format to the haplotigs from Falcon-Unzip.