Open EricDeveaud opened 1 year ago
here's how I patched my copy of clinsv.
ncpus 15, 12 and 1 (not really usefull in case of 1, but anyway) allow to limit the nimber of parallel jobs when submited in a slurm allocation or in taskset context.
NB I alos used $^X
in place of plain perl
call just to be sure to use the same interpreter.
see:https://perldoc.perl.org/perlvar#$%5EX
@@ -319,20 +316,25 @@
open(OUT,">$$cJ{rShStem}.sh");
print OUT "$r_head\n\n";
print OUT "cd $r_OutDir/\n";
+ # avoid overcommit. be sure to limit to available number of pro
cs
+ print OUT "(( ncpus15 = `nproc` < 15 ? `nproc` : 15 ))\n";
+ print OUT "(( ncpus12 = `nproc` < 12 ? `nproc` : 12 ))\n ";
+ print OUT "(( ncpus1 = `nproc` < 1 ? `nproc` :1))\n";
foreach $cT (("q0","q20","mq")){ # create bw of s1 for MQ>=0 and
MQ>=20
if($cT eq "mq"){
- print OUT "\n\nawk '\$2>=100000 {print \$1\":1-\
"\$2}' $refFasta.chrom.sizes | xargs -P 15 -t -i{} perl $S_SV_scriptDir/bam2wigM
Q.pl ";
+ # use same perl than the one clinsv run with
+ print OUT "\n\nawk '\$2>=100000 {print \$1\":1-\
"\$2}' $refFasta.chrom.sizes | xargs -P \${ncpus15} -t -i{} $^X $S_SV_scriptDir
/bam2wigMQ.pl ";
print OUT "-s 1 -r \"{}\" -o $r_TmpDir/$cSample.
$cT -f $refFasta -b $rAlnDir/$cSample.bam\n";
- print OUT "\n\nawk '\$2<100000 {print \$1\":1-\"
\$2}' $refFasta.chrom.sizes | xargs -P 1 -t -i{} perl $S_SV_scriptDir/bam2wigMQ.
pl ";
+ print OUT "\n\nawk '\$2<100000 {print \$1\":1-\"
\$2}' $refFasta.chrom.sizes | xargs -P \${ncpus1} -t -i{} $^X $S_SV_scriptDir/ba
m2wigMQ.pl ";
print OUT "-s 1 -r \"{}\" -o $r_TmpDir/$cSample.
$cT.small_contigs -f $refFasta -b $rAlnDir/$cSample.bam -a \n";
}else{
- print OUT "\n\nawk '\$2>=100000 {print \$1\":1-\
"\$2}' $refFasta.chrom.sizes | xargs -P 15 -t -i{} perl $S_SV_scriptDir/bam2wig.
pl ";
+ print OUT "\n\nawk '\$2>=100000 {print \$1\":1-\
"\$2}' $refFasta.chrom.sizes | xargs -P \${ncpus15} -t -i{} $^X $S_SV_scriptDir/
bam2wig.pl ";
print OUT "-s 1 -q $cT -r \"{}\" -o $r_TmpDir/$c
Sample.$cT -f $refFasta -b $rAlnDir/$cSample.bam\n";
- print OUT "\n\nawk '\$2<100000 {print \$1\":1-\"
\$2}' $refFasta.chrom.sizes | xargs -P 1 -t -i{} perl $S_SV_scriptDir/bam2wig.pl
";
+ print OUT "\n\nawk '\$2<100000 {print \$1\":1-\"
\$2}' $refFasta.chrom.sizes | xargs -P \${ncpus1} -t -i{} $^X $S_SV_scriptDir/ba
m2wig.pl ";
print OUT "-s 1 -q $cT -r \"{}\" -o $r_TmpDir/$c
Sample.$cT.small_contigs -f $refFasta -b $rAlnDir/$cSample.bam -a\n";
}
print OUT "cat $r_TmpDir/$cSample.$cT.*.wig > $r_TmpDir/$cSample.$cT.wig\n\n";
illustration.
rpm_maker://tmp > cat no_overcommit.sh
#!/bin/sh
(( ncpus15 = `nproc` < 15 ? `nproc` : 15 ))
(( ncpus12 = `nproc` < 12 ? `nproc` : 12 ))
(( ncpus1 = `nproc` < 1 ? `nproc` :1))
echo "ncpus15: ${ncpus15} - ncpus12: ${ncpus12} - ncpus1: ${ncpus1}"
and execution in various conditions
rpm_maker://tmp > nproc
64
rpm_maker://tmp > sh no_overcommit.sh
ncpus15: 15 - ncpus12: 12 - ncpus1: 1
now limit to 12 available procs
rpm_maker://tmp > taskset -c 0-11 sh no_overcommit.sh
ncpus15: 12 - ncpus12: 12 - ncpus1: 1
even less
rpm_maker://tmp > taskset -c 0-3 sh no_overcommit.sh
ncpus15: 4 - ncpus12: 4 - ncpus1: 1
regards
Eric
Excellent feedback thank you Eric, we'll look to incorporate this. Can you please comment on the runtime performance with using less resources than we tested on?
Cheers
On Fri, 10 Feb 2023, 2:00 am Eric Deveaud, @.***> wrote:
here's how I patched my copy of clinsv.
ncpus 15, 12 and 1 (not really usefull in case of 1, but anyway) allow to limit the nimber of parallel jobs when submited in a slurm allocation or in taskset context. NB I alos used $^X in place of plain perl call just to be sure to use the same interpreter. see:https://perldoc.perl.org/perlvar#$%5EX
@@ -319,20 +316,25 @@ open(OUT,">$$cJ{rShStem}.sh"); print OUT "$r_head\n\n"; print OUT "cd $r_OutDir/\n";
avoid overcommit. be sure to limit to available number of pro
cs
print OUT "(( ncpus15 =
nproc
< 15 ?nproc
: 15 ))\n";print OUT "(( ncpus12 =
nproc
< 12 ?nproc
: 12 ))\n ";print OUT "(( ncpus1 =
nproc
< 1 ?nproc
:1))\n";foreach $cT (("q0","q20","mq")){ # create bw of s1 for MQ>=0 and
MQ>=20 if($cT eq "mq"){
print OUT "\n\nawk '\$2>=100000 {print \$1\":1-\ "\$2}' $refFasta.chrom.sizes | xargs -P 15 -t -i{} perl $S_SV_scriptDir/bam2wigM Q.pl ";
use same perl than the one clinsv run with
print OUT "\n\nawk '\$2>=100000 {print \$1\":1-\ "\$2}' $refFasta.chrom.sizes | xargs -P \${ncpus15} -t -i{} $^X $S_SV_scriptDir /bam2wigMQ.pl "; print OUT "-s 1 -r \"{}\" -o $r_TmpDir/$cSample. $cT -f $refFasta -b $rAlnDir/$cSample.bam\n";
print OUT "\n\nawk '\$2<100000 {print \$1\":1-\" \$2}' $refFasta.chrom.sizes | xargs -P 1 -t -i{} perl $S_SV_scriptDir/bam2wigMQ. pl ";
print OUT "\n\nawk '\$2<100000 {print \$1\":1-\" \$2}' $refFasta.chrom.sizes | xargs -P \${ncpus1} -t -i{} $^X $S_SV_scriptDir/ba m2wigMQ.pl "; print OUT "-s 1 -r \"{}\" -o $r_TmpDir/$cSample. $cT.small_contigs -f $refFasta -b $rAlnDir/$cSample.bam -a \n";
}else{
print OUT "\n\nawk '\$2>=100000 {print \$1\":1-\ "\$2}' $refFasta.chrom.sizes | xargs -P 15 -t -i{} perl $S_SV_scriptDir/bam2wig. pl ";
print OUT "\n\nawk '\$2>=100000 {print \$1\":1-\ "\$2}' $refFasta.chrom.sizes | xargs -P \${ncpus15} -t -i{} $^X $S_SV_scriptDir/bam2wig.pl "; print OUT "-s 1 -q $cT -r \"{}\" -o $r_TmpDir/$c Sample.$cT -f $refFasta -b $rAlnDir/$cSample.bam\n";
print OUT "\n\nawk '\$2<100000 {print \$1\":1-\" \$2}' $refFasta.chrom.sizes | xargs -P 1 -t -i{} perl $S_SV_scriptDir/bam2wig.pl ";
print OUT "\n\nawk '\$2<100000 {print \$1\":1-\" \$2}' $refFasta.chrom.sizes | xargs -P \${ncpus1} -t -i{} $^X $S_SV_scriptDir/bam2wig.pl "; print OUT "-s 1 -q $cT -r \"{}\" -o $r_TmpDir/$c Sample.$cT.small_contigs -f $refFasta -b $rAlnDir/$cSample.bam -a\n"; } print OUT "cat $r_TmpDir/$cSample.$cT.*.wig > $r_TmpDir/$cSample.$cT.wig\n\n";
illustration.
rpm_maker://tmp > cat no_overcommit.sh
!/bin/sh
(( ncpus15 =
nproc
< 15 ?nproc
: 15 )) (( ncpus12 =nproc
< 12 ?nproc
: 12 )) (( ncpus1 =nproc
< 1 ?nproc
:1))echo "ncpus15: ${ncpus15} - ncpus12: ${ncpus12} - ncpus1: ${ncpus1}"
and execution in various conditions
rpm_maker://tmp > nproc 64 rpm_maker://tmp > sh no_overcommit.sh ncpus15: 15 - ncpus12: 12 - ncpus1: 1
now limit to 12 available procs
rpm_maker://tmp > taskset -c 0-11 sh no_overcommit.sh ncpus15: 12 - ncpus12: 12 - ncpus1: 1
even less
rpm_maker://tmp > taskset -c 0-3 sh no_overcommit.sh ncpus15: 4 - ncpus12: 4 - ncpus1: 1
regards
Eric
— Reply to this email directly, view it on GitHub https://github.com/KCCG/ClinSV/issues/42#issuecomment-1424325610, or unsubscribe https://github.com/notifications/unsubscribe-auth/AAEQQM2YRUC5Z2G2IJYSW4DWWUBADANCNFSM6AAAAAAUFMGTTU . You are receiving this because you are subscribed to this thread.Message ID: @.***>
as testing with NA12878_b38.bam
is a long time consumer I did not tested with less that the requested ressources.
I ran it using plain 64 CPUs ie axrgs -P 15, -P 12 as initialy
so sorry I won't be abble to comment on performances
it would be nice to have a small dataset to test
Agreed, I think @j-bradlee has a small test dataset... James?
On Fri, 10 Feb 2023, 8:56 pm Eric Deveaud, @.***> wrote:
as testing with NA12878_b38.bam is a long time consumer I did not tested with less that the requested ressources. I ran it using plain 64 CPUs ie axrgs -P 15, -P 12 as initialy so sorry I won't be abble to comment on performances
it would be nice to have a small dataset to test
— Reply to this email directly, view it on GitHub https://github.com/KCCG/ClinSV/issues/42#issuecomment-1425539700, or unsubscribe https://github.com/notifications/unsubscribe-auth/AAEQQM5IC4H3ZRV6WYDPFCLWWYGF7ANCNFSM6AAAAAAUFMGTTU . You are receiving this because you commented.Message ID: @.***>
Heres the subsampled b38 bai file: https://drive.google.com/file/d/1OiFZA4z-k2eTNOYLktHflsI5pGwaijvI/view?usp=share_link Heres the subsampled b38 bam file: https://drive.google.com/file/d/1ohArdn1l2etlQunD8gI1TzHyPZWLRMuT/view?usp=share_link
thanks,
as soon as Ican. get my hands on the my computer I'll try with this ones.
stay tunned
maybee you can chnage bam file permissions to Anyone with the link
.
currently I can't get it from the cluster (different id) using gdown
Apologies I thought I had set it to anyone with link. Try again now
it's OK now thanks
Hello
using ClinSV-1.0.0 we noticed some potential problem when ClinSV is run through the use of taskset, batch system like slurm, etc...
clinsv have some harcoded number of process for xargs
xargs -P 15
,xargs -P 12
which may be over the available procs for the process.eg if I run ClinSV trough our standard slurm allocation (4 procs) we will have at least 15 processes that compete on 4 cores leading to some over subscription.
I dunno how to get the number of available procs for a process (not number of physical process on host) in perl like
len(os.sched_getaffinity(0)))"
in python oromp_get_max_threads();
in C++it would be nice to have xargs -P value limited to the number of available procs
anyway a shell solution will be to compute the nnumber of available cpus
npoc
is adequat for that and replace harcoded values by the computed number something likecompute ncpus
then replace
xargs -P 15
byxargs -P ${ncpus}
regards
Eric