rnabioco / valr

Genome Interval Arithmetic in R
http://rnabioco.github.io/valr/
Other
88 stars 25 forks source link

add genomecov #411

Closed kriemo closed 1 year ago

kriemo commented 1 year ago
library(valr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(microbenchmark)

genome <- read_genome("https://hgdownload.soe.ucsc.edu/goldenPath/sacCer3/bigZips/sacCer3.chrom.sizes")

ivls <- bed_random(
  genome,
  length = 100,
  n = 1e5, 
  seed = 42
) 

bp <- bed_partition(ivls, .depth = n())
bp
#> # A tibble: 154,751 × 4
#>    chrom start   end .depth
#>    <chr> <int> <int>  <int>
#>  1 chrI     59   159      1
#>  2 chrI    165   207      1
#>  3 chrI    207   265      2
#>  4 chrI    265   307      1
#>  5 chrI    456   527      1
#>  6 chrI    527   534      2
#>  7 chrI    534   556      3
#>  8 chrI    556   617      2
#>  9 chrI    617   627      3
#> 10 chrI    627   634      2
#> # ℹ 154,741 more rows

bgc <- bed_genomecov(ivls, genome)
bgc
#> # A tibble: 154,751 × 4
#>    chrom start   end .depth
#>    <chr> <int> <int>  <int>
#>  1 chrI     59   159      1
#>  2 chrI    165   207      1
#>  3 chrI    207   265      2
#>  4 chrI    265   307      1
#>  5 chrI    456   527      1
#>  6 chrI    527   534      2
#>  7 chrI    534   556      3
#>  8 chrI    556   617      2
#>  9 chrI    617   627      3
#> 10 chrI    627   634      2
#> # ℹ 154,741 more rows

identical(bp, bgc)
#> [1] TRUE

# 10 million intervals
ivls <- bed_random(
  genome,
  length = 100,
  n = 1e7, 
  seed = 42
) 
microbenchmark(bed_genomecov(ivls, genome), times = 3, unit = 's')
#> Warning in microbenchmark(bed_genomecov(ivls, genome), times = 3, unit = "s"):
#> less accurate nanosecond times to avoid potential integer overflows
#> Unit: seconds
#>                         expr      min       lq     mean   median       uq
#>  bed_genomecov(ivls, genome) 1.653511 1.677021 1.860034 1.700531 1.963296
#>      max neval
#>  2.22606     3

Created on 2023-10-11 with reprex v2.0.2

jayhesselberth commented 1 year ago

Can you run styler::style_pkg()?

jayhesselberth commented 1 year ago

This looks great, thanks for putting it together.