Open mhoban opened 1 year ago
Stuff that Evan messed around with:
#!/usr/bin/env bash
#eDNAFlow
#installed on kinilau at /drives/storage/sw/eDNAFlow/
nextflow /drives/storage/sw/eDNAFlow/eDNAFlow.nf -params-file paka_opts.yml
nextflow /drives/storage/sw/eDNAFlow/eDNAFlow.nf --paired --reads ./fastq/ --barcode ./data/18S.tsv --ilumina-demultiplexed --collapse-taxonomy --max-memory 200.GB --max-cpus 30
#input_fa=/drives/storage/users/evan/data_help/eDNAFlow_opt/AS_Jan24_18s_zotus.fasta
#input_fa=/drives/storage/users/evan/data_help/eDNAFlow_opt/AS_short.fasta
input_fa=AS_short.fasta
#out_path=/drives/storage/users/evan/data_help/eDNAFlow_opt/blast_result_full.tsv
out_path=blast_result_full.tsv
blast_jobs=30
gnupar_jobs=10
#singularity run -B $(readlink -m .) -B $(dirname $FLOW_BLAST) --env BLASTDB=$FLOW_BLAST /drives/storage/sw/singularity_cache/ncbi-blast-latest.img blastn <all blastn options here>
###################################
#benchmarking
###################################
start=`date +%s`
singularity run -B $(readlink -m .) -B $(dirname $FLOW_BLAST) -B /drives/blast/ --env BLASTDB=$FLOW_BLAST /drives/storage/sw/singularity_cache/ncbi-blast-latest.img \
blastn -db "$FLOW_BLAST" -outfmt "6 qseqid sseqid staxid ssciname scomname sskingdom pident length qlen slen mismatch gapopen gaps qstart qend sstart send stitle evalue bitscore qcovs qcovhsp" \
-perc_identity 95 -evalue 1e-3 -best_hit_score_edge 0.05 -best_hit_overhang 0.25 -qcov_hsp_perc 100 -max_target_seqs 10 \
-query $input_fa -num_threads $blast_jobs > $out_path
end=`date +%s`
runtime=$((end-start))
echo $runtime
##3 SEQS
#1 core no GNU
#321 (>out)
#325 (-out)
#10 cores no GNU
#330 (-out)
#304 (>out)
#30 cores no GNU
#270 (-out)
#322 (>out)
###################################
#parallel blast vs multithreaded
###################################
#assign desired parallel jobs
gnupar_jobs=25
#optional multithreading for blast (doesn't apprear to benefit above 4)
blast_jobs=2
#input_fa=AS_Jan24_18s_zotus.fasta
input_fa=seq3000_test.fasta
#get seq count
seqs_total=`grep ">Zotu" $input_fa | wc -l`
#make fasta single line and split into temp files - currently just one file per gnu job
perl -pe '$. > 1 and /^>/ ? print "\n" : chomp' $input_fa | split -l $(($seqs_total*2 / ($gnupar_jobs*$blast_jobs))) - --additional-suffix=.fasta split_fastas/query_seqs_
blast_opts="-outfmt \"6 qseqid sseqid staxid ssciname scomname sskingdom pident length qlen slen mismatch gapopen gaps qstart qend sstart send stitle evalue bitscore qcovs qcovhsp\" -perc_identity 95 -evalue 1e-3 -best_hit_score_edge 0.05 -best_hit_overhang 0.25 -qcov_hsp_perc 100 -max_target_seqs 10"
#
parallel --joblog blasttmp/log -k -j $gnupar_jobs \
singularity run -B $(readlink -m .) -B $(readlink -m ./split_fastas) -B $(readlink -m ./blasttmp) -B $(dirname $FLOW_BLAST) -B /drives/blast/ \
--env BLASTDB=$FLOW_BLAST /drives/storage/sw/singularity_cache/ncbi-blast-latest.img \
blastn -db "$FLOW_BLAST" $blast_opts \
-query ./split_fastas/query_seqs_{/.}.fasta -num_threads $blast_jobs ">" ./blasttmp/{/.}_tmp ::: \
` find ./split_fastas -maxdepth 1 -name "query_seqs*.fasta" | cut -d "_" -f4 | sort `
cat blasttmp/*_tmp > blast_out.tsv
#clean up temp files/rm blasttmp dir
#maybe add a QC check here to sort results by Zotu#
###################################
#progress bar
###################################
#I didn't get around to this much but I think a fairly striaghtforward way to do this (since the blast output files don't seem to want to write until completed) would be to just check how many tmp files aren't empty
#I am not sure if there is a way to easily tie in that progress to the nextflow progress bars - like the one for demultiplexing that shows X of total samples completed? Would be possible to report this in #of Zotus blasted since each chunk is
# equal to $(($seqs_total*2 / ($gnupar_jobs*$blast_jobs))) Zotus
total_chunks=`find ./split_fastas/ -type f -name '*' -size +0 -print | wc -l`
completed_chunks=`find ./blasttmp/ -type f -name '*tmp' -size +0 -print | wc -l`
Maybe we can split the fasta file into several different pieces and blast each of those in parallel?