bcgsc / ntedit_sealer_protocol

Efficient targeted error resolution and automated finishing of long-read genome assemblies
5 stars 1 forks source link

Make ntEdit+Sealer compatible with newer ntHits/ntEdit versions #9

Closed lcoombe closed 3 months ago

lcoombe commented 3 months ago

Dry-run with ntHits v0.0.1 installed:

(python3.8) [lcoombe@hpce706 re-run]$ ntedit-sealer finish seqs=ecoli_shasta.fa reads="SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz" k="80 65 50" b=200M  -nB
abyss-bloom build -v -v -k80 -j8 -b200M -l2 -q15 - SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz | pigz -p8 -c > k80.bloom.z
abyss-bloom build -v -v -k65 -j8 -b200M -l2 -q15 - SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz | pigz -p8 -c > k65.bloom.z
abyss-bloom build -v -v -k50 -j8 -b200M -l2 -q15 - SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz | pigz -p8 -c > k50.bloom.z
nthits -b36 --outbloom --solid -p sr_solid -k80 -t8 SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz
nthits -b36 --outbloom --solid -p sr_solid -k65 -t8 SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz
nthits -b36 --outbloom --solid -p sr_solid -k50 -t8 SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz
/projects/btl/lcoombe/git/ntedit_sealer_protocol/bin/run_ntedit.sh ecoli_shasta "sr_solid_k80.bf sr_solid_k65.bf sr_solid_k50.bf" "80 65 50" 0.5 0.5 8 ecoli_shasta.ntedit_edited.fa
python3 /projects/btl/lcoombe/git/ntedit_sealer_protocol/bin/mask_short_sequences.py -s -k50 ecoli_shasta.ntedit_edited.fa > ecoli_shasta.ntedit_edited.prepd.fa
abyss-sealer -v -S ecoli_shasta.ntedit_edited.prepd.fa -t ecoli_shasta.ntedit_edited.prepd-sealed-trace.txt \
-o ecoli_shasta.ntedit_edited.prepd.sealer -L100 -j8 -P10 --lower \
-k80 --input-bloom=<(pigz -p8 -d -c k80.bloom.z) -k65 --input-bloom=<(pigz -p8 -d -c k65.bloom.z) -k50 --input-bloom=<(pigz -p8 -d -c k50.bloom.z)
echo "ntEdit and Sealer polishing steps complete! Polished assembly can be found in: ecoli_shasta.ntedit_edited.prepd.sealer_scaffold.fa"

Dry-run with ntHits v1.0.3 installed:

(ntedit_sealer_test) [lcoombe@hpce706 re-run]$ ntedit-sealer finish seqs=ecoli_shasta.fa reads="SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz" k="80 65 50" b=200M  -nB
abyss-bloom build -v -v -k80 -j8 -b200M -l2 -q15 - SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz | pigz -p8 -c > k80.bloom.z
abyss-bloom build -v -v -k65 -j8 -b200M -l2 -q15 - SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz | pigz -p8 -c > k65.bloom.z
abyss-bloom build -v -v -k50 -j8 -b200M -l2 -q15 - SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz | pigz -p8 -c > k50.bloom.z
ntcard -k80 -t8 -p ntcard_hist SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz
nthits bf -f ntcard_hist_k80.hist --solid -t 8 -k 80 -o sr_solid_k80.bf SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz
ntcard -k65 -t8 -p ntcard_hist SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz
nthits bf -f ntcard_hist_k65.hist --solid -t 8 -k 65 -o sr_solid_k65.bf SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz
ntcard -k50 -t8 -p ntcard_hist SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz
nthits bf -f ntcard_hist_k50.hist --solid -t 8 -k 50 -o sr_solid_k50.bf SRR15859208_1.fastq.gz SRR15859208_2.fastq.gz
/projects/btl/lcoombe/git/ntedit_sealer_protocol/bin/run_ntedit.sh ecoli_shasta "sr_solid_k80.bf sr_solid_k65.bf sr_solid_k50.bf" "80 65 50" 0.5 0.5 8 ecoli_shasta.ntedit_edited.fa
python3 /projects/btl/lcoombe/git/ntedit_sealer_protocol/bin/mask_short_sequences.py -s -k50 ecoli_shasta.ntedit_edited.fa > ecoli_shasta.ntedit_edited.prepd.fa
abyss-sealer -v -S ecoli_shasta.ntedit_edited.prepd.fa -t ecoli_shasta.ntedit_edited.prepd-sealed-trace.txt \
-o ecoli_shasta.ntedit_edited.prepd.sealer -L100 -j8 -P10 --lower \
-k80 --input-bloom=<(pigz -p8 -d -c k80.bloom.z) -k65 --input-bloom=<(pigz -p8 -d -c k65.bloom.z) -k50 --input-bloom=<(pigz -p8 -d -c k50.bloom.z)
echo "ntEdit and Sealer polishing steps complete! Polished assembly can be found in: ecoli_shasta.ntedit_edited.prepd.sealer_scaffold.fa"