evaluation - Githubissues

martin-raden commented 5 years ago

alle kombinationen der folgende predictor und accessibility varianten sollten getestet werden (bzgl qualität, laufzeit, speicher) im vergleich zu deinem IntaRNA-master.

accessibility

RNAplfold (default)
RNAplex-like approximation (to be implemented)

predictor

exact (2D speichervariante)
heuristisch (erst links dann rechts)
heuristisch ala RIblast (erst stacks only dann interior loops bis energie nicht verbessert)

martin-raden commented 5 years ago

clone and run https://github.com/BackofenLab/IntaRNA-benchmark (ping @rickgelhausen)

martin-raden commented 5 years ago

send respective calls @ martin

Fulanko commented 5 years ago

Benchmarking

Default Master branch:

master ./IntaRNA

SeedExtension branch: length-denpendence quality plots (8 threads)

memory_efficient_xx (xx = 20, 30, 40) ./IntaRNA --model X -m M --qIntLenMax 20 --tIntLenMax 20 --threads 8 ./IntaRNA --model X -m M --qIntLenMax 30 --tIntLenMax 30 --threads 8 ./IntaRNA --model X -m M --qIntLenMax 40 --tIntLenMax 40 --threads 8

heuristic_xx (xx = 20, 30, 40) ./IntaRNA --model X -m H --qIntLenMax 20 --tIntLenMax 20 --threads 8 ./IntaRNA --model X -m H --qIntLenMax 30 --tIntLenMax 30 --threads 8 ./IntaRNA --model X -m H --qIntLenMax 40 --tIntLenMax 40 --threads 8

riblast_xx (xx = 20, 30, 40) ./IntaRNA --model X -m R --qIntLenMax 20 --tIntLenMax 20 --threads 8 ./IntaRNA --model X -m R --qIntLenMax 30 --tIntLenMax 30 --threads 8 ./IntaRNA --model X -m R --qIntLenMax 40 --tIntLenMax 40 --threads 8

ensemble_memory_efficient_xx (xx = 20, 30, 40) ./IntaRNA --model E -m M --qIntLenMax 20 --tIntLenMax 20 --threads 8 ./IntaRNA --model E -m M --qIntLenMax 30 --tIntLenMax 30 --threads 8 ./IntaRNA --model E -m M --qIntLenMax 40 --tIntLenMax 40 --threads 8

ensemble_heuristic_xx (xx = 20, 30, 40) ./IntaRNA --model E -m H --qIntLenMax 20 --tIntLenMax 20 --threads 8 ./IntaRNA --model E -m H --qIntLenMax 30 --tIntLenMax 30 --threads 8 ./IntaRNA --model E -m H --qIntLenMax 40 --tIntLenMax 40 --threads 8

length-dependence time/memory plots (1 thread + no-ED)

no_ed_memory_efficient_xx (xx = 20, 30, 40) ./IntaRNA --model X -m M --qIntLenMax 20 --tIntLenMax 20 --tacc=N --qacc=N --threads 1 ./IntaRNA --model X -m M --qIntLenMax 30 --tIntLenMax 30 --tacc=N --qacc=N --threads 1 ./IntaRNA --model X -m M --qIntLenMax 40 --tIntLenMax 40 --tacc=N --qacc=N --threads 1

no_ed_heuristic_xx (xx = 20, 30, 40) ./IntaRNA --model X -m H --qIntLenMax 20 --tIntLenMax 20 --tacc=N --qacc=N --threads 1 ./IntaRNA --model X -m H --qIntLenMax 30 --tIntLenMax 30 --tacc=N --qacc=N --threads 1 ./IntaRNA --model X -m H --qIntLenMax 40 --tIntLenMax 40 --tacc=N --qacc=N --threads 1

no_ed_riblast_xx (xx = 20, 30, 40) ./IntaRNA --model X -m R --qIntLenMax 20 --tIntLenMax 20 --tacc=N --qacc=N --threads 1 ./IntaRNA --model X -m R --qIntLenMax 30 --tIntLenMax 30 --tacc=N --qacc=N --threads 1 ./IntaRNA --model X -m R --qIntLenMax 40 --tIntLenMax 40 --tacc=N --qacc=N --threads 1

no_ed_ensemble_memory_efficient_xx (xx = 20, 30, 40) ./IntaRNA --model E -m M --qIntLenMax 20 --tIntLenMax 20 --tacc=N --qacc=N --threads 1 ./IntaRNA --model E -m M --qIntLenMax 30 --tIntLenMax 30 --tacc=N --qacc=N --threads 1 ./IntaRNA --model E -m M --qIntLenMax 40 --tIntLenMax 40 --tacc=N --qacc=N --threads 1

no_ed_ensemble_heuristic_xx (xx = 20, 30, 40) ./IntaRNA --model E -m H --qIntLenMax 20 --tIntLenMax 20 --tacc=N --qacc=N --threads 1 ./IntaRNA --model E -m H --qIntLenMax 30 --tIntLenMax 30 --tacc=N --qacc=N --threads 1 ./IntaRNA --model E -m H --qIntLenMax 40 --tIntLenMax 40 --tacc=N --qacc=N --threads 1

martin-raden commented 5 years ago


#########################################################################

# benchmark on new cluster with qsub in
# /scratch/bi03/gelhausr/intaRNA/IntaRNA-benchmark/

# results in
# /scratch/bi03/gelhausr/intaRNA/IntaRNA-benchmark/benchmark-Frank/

#########################################################################

# use all threads
defaultArgs=""

IntaRNA="/scratch/rna/bisge001/Software/intarna/dev-Frank-190319/bin/IntaRNA"

#########################################################################

function printCall {
  echo "qsub -pe smp 24 -o /scratch/bi03/gelhausr/intaRNA/IntaRNA-benchmark/sge-out/ intarna-benchmark-sge.sh -e -b $IntaRNA -c '$1' -o './benchmark-Frank/' -a '$2 $defaultArgs'";
} 

function printCallNewED {
  echo "qsub -pe smp 24 -o /scratch/bi03/gelhausr/intaRNA/IntaRNA-benchmark/sge-out/ intarna-benchmark-sge.sh -b $IntaRNA -c '$1' -o './benchmark-Frank/' -a '$2 $defaultArgs'";
} 

#########################################################################
# calls 190319
#########################################################################
(

# master default
printCall "default" " --threads=0"
printCallNewED "mem-default" " --threads=1 --tacc=N --qacc=N"

intLenMax="20 30 40 50 60 70 80 100"

# need to recompute EDs, since otherwise --tIntLenMax is set by calls.py
for l in $intLenMax; do 
 # master short interactions
 printCall "default-intLenMax$l" " --qIntLenMax=$l --tIntLenMax=$l --threads=0"
 printCallNewED "mem-default-intLenMax$l" " --qIntLenMax=$l --tIntLenMax=$l --threads=1 --tacc=N --qacc=N"
 # seed-extension
 for m in M H R; do
  printCall "X-$m-intLenMax$l" " --model=X -m $m --qIntLenMax=$l --tIntLenMax=$l --threads=0"
  printCallNewED "mem-X-$m-intLenMax$l" " --model=X -m $m --qIntLenMax=$l --tIntLenMax=$l --threads=1 --tacc=N --qacc=N"
 done
 for m in M H; do
  printCall "E-$m-intLenMax$l" " --model=E -m $m --qIntLenMax=$l --tIntLenMax=$l --threads=0"
  printCallNewED "mem-E-$m-intLenMax$l" " --model=E -m $m --qIntLenMax=$l --tIntLenMax=$l --threads=1 --tacc=N --qacc=N"
 done
done

) > calls.190319.sh # | sort
#########################################################################

martin-raden commented 5 years ago


#########################################################################

# benchmark on new cluster with qsub in
# /scratch/bi03/gelhausr/intaRNA/IntaRNA-benchmark/

# results in
# /scratch/bi03/gelhausr/intaRNA/IntaRNA-benchmark/benchmark-Frank/

#########################################################################

# use all threads
defaultArgs=""

IntaRNA="/scratch/rna/bisge001/Software/intarna/dev-Frank-190403/bin/IntaRNA"

#########################################################################

function printCall {
  echo "qsub -pe smp 24 -o /scratch/bi03/gelhausr/intaRNA/IntaRNA-benchmark/sge-out/ intarna-benchmark-sge.sh -e -b $IntaRNA -c '$1' -o './benchmark-Frank/' -a '$2 $defaultArgs'";
} 

function printCallNewED {
  echo "qsub -pe smp 24 -o /scratch/bi03/gelhausr/intaRNA/IntaRNA-benchmark/sge-out/ intarna-benchmark-sge.sh -b $IntaRNA -c '$1' -o './benchmark-Frank/' -a '$2 $defaultArgs'";
} 

#########################################################################
# calls 190319
#########################################################################
(

# master default
#printCall "default" " --threads=0"
#printCallNewED "mem-default" " --threads=1 --tacc=N --qacc=N"

intLenMax="20 30 40 50 60 70 80 100"

# need to recompute EDs, since otherwise --tIntLenMax is set by calls.py
for l in $intLenMax; do 
 # master short interactions
# printCall "default-intLenMax$l" " --qIntLenMax=$l --tIntLenMax=$l --threads=0"
# printCallNewED "mem-default-intLenMax$l" " --qIntLenMax=$l --tIntLenMax=$l --threads=1 --tacc=N --qacc=N"
 # seed-extension
 for m in M H R; do
  printCall "X-$m-intLenMax$l" " --model=X -m $m --qIntLenMax=$l --tIntLenMax=$l --threads=0"
#  printCallNewED "mem-X-$m-intLenMax$l" " --model=X -m $m --qIntLenMax=$l --tIntLenMax=$l --threads=1 --tacc=N --qacc=N"
 done
# for m in M H; do
#  printCall "E-$m-intLenMax$l" " --model=E -m $m --qIntLenMax=$l --tIntLenMax=$l --threads=0"
#  printCallNewED "mem-E-$m-intLenMax$l" " --model=E -m $m --qIntLenMax=$l --tIntLenMax=$l --threads=1 --tacc=N --qacc=N"
# done
done

) > calls.190403.sh # | sort
#########################################################################

Fulanko / IntaRNA

evaluation #5

accessibility

predictor