rnabioco / valr

Genome Interval Arithmetic in R
http://rnabioco.github.io/valr/
Other
87 stars 25 forks source link

Recent benchmarks #259

Closed jayhesselberth closed 7 years ago

jayhesselberth commented 7 years ago
library(valr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
library(tibble)
library(scales)
library(microbenchmark)

genome <- read_genome(valr_example('hg19.chrom.sizes.gz'))

# number of intervals
n <- 1e6
# number of timing reps
nrep <- 20

seed_x <- 1010486
x <- bed_random(genome, n = n, seed = seed_x)
seed_y <- 9283019
y <- bed_random(genome, n = n, seed = seed_y)

res <- microbenchmark(
  # randomizing functions
  bed_random(genome, n = n, seed = seed_x),
  bed_shuffle(x, genome, seed = seed_x),
  # # single tbl functions
  bed_slop(x, genome, both = 1000),
  bed_flank(x, genome, both = 1000),
  bed_merge(x),
  bed_cluster(x),
  bed_complement(x, genome),
  # multi tbl functions
  bed_closest(x, y),
  bed_intersect(x, y),
  bed_map(x, y, .n = n()),
  bed_subtract(x, y),
  # stats
  bed_absdist(x, y, genome),
  bed_reldist(x, y),
  bed_jaccard(x, y),
  bed_fisher(x, y, genome),
  bed_projection(x, y, genome),
  # utilities
  # don't test makewindows ... too slooow
  bound_intervals(x, genome),
  # bed_makewindows(x, genome, win_size = 10),
  times = nrep,
  unit = 's')

# covert nanoseconds to seconds
res <- res %>%
  as_tibble() %>%
  mutate(time = time / 1e9) %>%
  arrange(time)

# futz with the x-axis
sts <- boxplot.stats(res$time)$stats
# filter out outliers
res <- filter(res, time <= max(sts) * 1.05)

ggplot(res, aes(x=reorder(expr, time), y=time)) +
  geom_boxplot(fill = 'red', outlier.shape = NA, alpha = 0.5) +
  coord_flip() +
  theme_bw() +
  labs(
    y='execution time (seconds)',
    x='',
    title="valr benchmarks",
    subtitle=paste(comma(n), "random x/y intervals,", comma(nrep), "repititions"))

jayhesselberth commented 7 years ago

After recent updates to makewindows, flank, merge.

library(valr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
library(tibble)
library(scales)
library(microbenchmark)

genome <- read_genome(valr_example('hg19.chrom.sizes.gz'))

# number of intervals
n <- 1e6
# number of timing reps
nrep <- 20

seed_x <- 1010486
x <- bed_random(genome, n = n, seed = seed_x)
seed_y <- 9283019
y <- bed_random(genome, n = n, seed = seed_y)

res <- microbenchmark(
  # randomizing functions
  bed_random(genome, n = n, seed = seed_x),
  bed_shuffle(x, genome, seed = seed_x),
  # # single tbl functions
  bed_slop(x, genome, both = 1000),
  bed_flank(x, genome, both = 1000),
  bed_merge(x),
  bed_cluster(x),
  bed_complement(x, genome),
  # multi tbl functions
  bed_closest(x, y),
  bed_intersect(x, y),
  bed_map(x, y, .n = n()),
  bed_subtract(x, y),
  # stats
  bed_absdist(x, y, genome),
  bed_reldist(x, y),
  bed_jaccard(x, y),
  bed_fisher(x, y, genome),
  bed_projection(x, y, genome),
  # utilities
  bed_makewindows(x, genome, win_size = 100),
  times = nrep,
  unit = 's')

# covert nanoseconds to seconds
res <- res %>%
  as_tibble() %>%
  mutate(time = time / 1e9) %>%
  arrange(time)

# futz with the x-axis
sts <- boxplot.stats(res$time)$stats
# filter out outliers
res <- filter(res, time <= max(sts) * 1.05)

ggplot(res, aes(x=reorder(expr, time), y=time)) +
  geom_boxplot(fill = 'red', outlier.shape = NA, alpha = 0.5) +
  coord_flip() +
  theme_bw() +
  labs(
    y='execution time (seconds)',
    x='',
    title="valr benchmarks",
    subtitle=paste(comma(n), "random x/y intervals,", comma(nrep), "repititions"))

jayhesselberth commented 7 years ago
library(valr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
library(tibble)
library(scales)
library(microbenchmark)

genome <- read_genome(valr_example('hg19.chrom.sizes.gz'))

# number of intervals
n <- 1e6
# number of timing reps
nrep <- 20

seed_x <- 1010486
x <- bed_random(genome, n = n, seed = seed_x)
seed_y <- 9283019
y <- bed_random(genome, n = n, seed = seed_y)

res <- microbenchmark(
  # randomizing functions
  bed_random(genome, n = n, seed = seed_x),
  bed_shuffle(x, genome, seed = seed_x),
  # # single tbl functions
  bed_slop(x, genome, both = 1000),
  bed_flank(x, genome, both = 1000),
  bed_shift(x, genome),
  bed_merge(x),
  bed_cluster(x),
  bed_complement(x, genome),
  # multi tbl functions
  bed_closest(x, y),
  bed_intersect(x, y),
  bed_map(x, y, .n = n()),
  bed_subtract(x, y),
  bed_window(x, y, genome),
  # stats
  bed_absdist(x, y, genome),
  bed_reldist(x, y),
  bed_jaccard(x, y),
  bed_fisher(x, y, genome),
  bed_projection(x, y, genome),
  # utilities
  bed_makewindows(x, genome, win_size = 100),
  times = nrep,
  unit = 's')

# covert nanoseconds to seconds
res <- res %>%
  as_tibble() %>%
  mutate(time = time / 1e9) %>%
  arrange(time)

# futz with the x-axis
maxs <- res %>%
  group_by(expr) %>%
  summarize(max.time = max(boxplot.stats(time)$stats))

# filter out outliers
res <- res %>%
  left_join(maxs) %>%
  filter(time <= max.time * 1.05)
#> Joining, by = "expr"

ggplot(res, aes(x=reorder(expr, time), y=time)) +
  geom_boxplot(fill = 'red', outlier.shape = NA, alpha = 0.5) +
  coord_flip() +
  theme_bw() +
  labs(
    y='execution time (seconds)',
    x='',
    title="valr benchmarks",
    subtitle=paste(comma(n), "random x/y intervals,", comma(nrep), "repititions"))

jayhesselberth commented 7 years ago

With dplyr v0.7.

library(valr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
library(tibble)
library(scales)
library(microbenchmark)

genome <- read_genome(valr_example('hg19.chrom.sizes.gz'))

# number of intervals
n <- 1e6
# number of timing reps
nrep <- 20

seed_x <- 1010486
x <- bed_random(genome, n = n, seed = seed_x)
seed_y <- 9283019
y <- bed_random(genome, n = n, seed = seed_y)

res <- microbenchmark(
  # randomizing functions
  bed_random(genome, n = n, seed = seed_x),
  bed_shuffle(x, genome, seed = seed_x),
  # # single tbl functions
  bed_slop(x, genome, both = 1000),
  bed_flank(x, genome, both = 1000),
  bed_shift(x, genome),
  bed_merge(x),
  bed_cluster(x),
  bed_complement(x, genome),
  # multi tbl functions
  bed_closest(x, y),
  bed_intersect(x, y),
  bed_map(x, y, .n = n()),
  bed_subtract(x, y),
  bed_window(x, y, genome),
  # stats
  bed_absdist(x, y, genome),
  bed_reldist(x, y),
  bed_jaccard(x, y),
  bed_fisher(x, y, genome),
  bed_projection(x, y, genome),
  # utilities
  bed_makewindows(x, genome, win_size = 100),
  times = nrep,
  unit = 's')

# covert nanoseconds to seconds
res <- res %>%
  as_tibble() %>%
  mutate(time = time / 1e9) %>%
  arrange(time)

# futz with the x-axis
maxs <- res %>%
  group_by(expr) %>%
  summarize(max.time = max(boxplot.stats(time)$stats))

# filter out outliers
res <- res %>%
  left_join(maxs) %>%
  filter(time <= max.time * 1.05)
#> Joining, by = "expr"
#> Joining, by = "expr"

ggplot(res, aes(x=reorder(expr, time), y=time)) +
  geom_boxplot(fill = 'red', outlier.shape = NA, alpha = 0.5) +
  coord_flip() +
  theme_bw() +
  labs(
    y='execution time (seconds)',
    x='',
    title="valr benchmarks",
    subtitle=paste(comma(n), "random x/y intervals,", comma(nrep), "repititions"))

Session info ``` r devtools::session_info() #> Session info ------------------------------------------------------------- #> setting value #> version R version 3.4.0 (2017-04-21) #> system x86_64, darwin16.5.0 #> ui unknown #> language (EN) #> collate en_US.UTF-8 #> tz America/Denver #> date 2017-06-10 #> Packages ----------------------------------------------------------------- #> package * version date source #> assertthat 0.2.0 2017-04-11 CRAN (R 3.4.0) #> backports 1.1.0 2017-05-22 CRAN (R 3.4.0) #> base * 3.4.0 2017-05-04 local #> bindr 0.1 2016-11-13 cran (@0.1) #> bindrcpp * 0.1 2016-12-11 CRAN (R 3.4.0) #> bitops 1.0-6 2013-08-17 cran (@1.0-6) #> broom 0.4.2 2017-02-13 CRAN (R 3.4.0) #> colorspace 1.3-2 2016-12-14 CRAN (R 3.4.0) #> compiler 3.4.0 2017-05-04 local #> datasets * 3.4.0 2017-05-04 local #> devtools 1.13.2 2017-06-02 CRAN (R 3.4.0) #> digest 0.6.12 2017-01-27 CRAN (R 3.4.0) #> dplyr * 0.7.0 2017-06-08 Github (tidyverse/dplyr@43dc94e) #> evaluate 0.10 2016-10-11 cran (@0.10) #> foreign 0.8-68 2017-04-24 CRAN (R 3.4.0) #> ggplot2 * 2.2.1 2016-12-30 CRAN (R 3.4.0) #> glue 1.0.0 2017-04-17 cran (@1.0.0) #> graphics * 3.4.0 2017-05-04 local #> grDevices * 3.4.0 2017-05-04 local #> grid 3.4.0 2017-05-04 local #> gtable 0.2.0 2016-02-26 CRAN (R 3.4.0) #> hms 0.3 2016-11-22 CRAN (R 3.4.0) #> htmltools 0.3.6 2017-04-28 cran (@0.3.6) #> knitr 1.16 2017-05-18 cran (@1.16) #> labeling 0.3 2014-08-23 CRAN (R 3.4.0) #> lattice 0.20-35 2017-03-25 CRAN (R 3.4.0) #> lazyeval 0.2.0 2016-06-12 CRAN (R 3.4.0) #> magrittr 1.5 2014-11-22 CRAN (R 3.4.0) #> memoise 1.1.0 2017-04-21 CRAN (R 3.4.0) #> methods * 3.4.0 2017-05-04 local #> microbenchmark * 1.4-2.1 2015-11-25 CRAN (R 3.4.0) #> mnormt 1.5-5 2016-10-15 CRAN (R 3.4.0) #> munsell 0.4.3 2016-02-13 CRAN (R 3.4.0) #> nlme 3.1-131 2017-02-06 CRAN (R 3.4.0) #> parallel 3.4.0 2017-05-04 local #> pkgconfig 2.0.1 2017-03-21 cran (@2.0.1) #> plyr 1.8.4 2016-06-08 CRAN (R 3.4.0) #> psych 1.7.5 2017-05-03 CRAN (R 3.4.0) #> R6 2.2.1 2017-05-10 CRAN (R 3.4.0) #> Rcpp 0.12.11 2017-05-22 CRAN (R 3.4.0) #> RCurl 1.95-4.8 2016-03-01 CRAN (R 3.4.0) #> readr 1.1.1 2017-05-16 CRAN (R 3.4.0) #> reshape2 1.4.2 2016-10-22 CRAN (R 3.4.0) #> rlang 0.1.1.9000 2017-06-08 Github (hadley/rlang@7f53e56) #> rmarkdown 1.5.9000 2017-06-10 Github (rstudio/rmarkdown@a1537e4) #> rprojroot 1.2 2017-01-16 CRAN (R 3.4.0) #> scales * 0.4.1 2016-11-09 CRAN (R 3.4.0) #> stats * 3.4.0 2017-05-04 local #> stringi 1.1.5 2017-04-07 CRAN (R 3.4.0) #> stringr 1.2.0 2017-02-18 CRAN (R 3.4.0) #> tibble * 1.3.3 2017-05-28 cran (@1.3.3) #> tidyr 0.6.3 2017-05-15 CRAN (R 3.4.0) #> tools 3.4.0 2017-05-04 local #> utils * 3.4.0 2017-05-04 local #> valr * 0.3.0 2017-06-10 local #> withr 1.0.2 2016-06-20 CRAN (R 3.4.0) #> XML 3.98-1.7 2017-05-03 CRAN (R 3.4.0) #> yaml 2.1.14 2016-11-12 cran (@2.1.14) ```