Open njtierney opened 6 years ago
Could possibly use rle
to create the encodings / start-end points for each rectangle.
rle(airquality$Ozone)
#> Run Length Encoding
#> lengths: int [1:152] 1 1 1 1 1 1 1 1 1 1 ...
#> values : int [1:152] 41 36 12 18 NA 28 23 19 8 NA ...
Created on 2019-06-08 by the reprex package (v0.2.1)
It looks like I might be able to use an alternative implementation of fingerprint
that is a bit faster for larger vectors.
fingerprint <- function(x){
x_class <- class(x)
# is the data missing?
ifelse(is.na(x),
# yes? Leave as is NA
yes = NA,
# no? make that value no equal to the class of this cell.
no = glue::glue_collapse(x_class,
sep = "\n")
)
} # end function
fingerprint_2 <- function(x){
# is the data missing?
x_class <- class(x)
dplyr::if_else(condition = is.na(x),
# yes? Leave as is NA
true = NA_character_,
# no? make that value no equal to the class of this cell.
false = as.character(glue::glue_collapse(x_class,
sep = "\n"))
)
} # end function
create_vec <- function(size){
vec <- runif(size)
vec[sample(vctrs::vec_seq_along(vec), size = round(size/10))] <- NA
vec
}
fingerprint(create_vec(100))
#> [1] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [8] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [15] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [22] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [29] NA "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [36] "numeric" "numeric" "numeric" NA NA "numeric" "numeric"
#> [43] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [50] "numeric" "numeric" NA "numeric" "numeric" "numeric" "numeric"
#> [57] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [64] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [71] NA NA "numeric" "numeric" NA "numeric" "numeric"
#> [78] "numeric" "numeric" "numeric" "numeric" NA "numeric" NA
#> [85] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [92] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [99] "numeric" NA
fingerprint_2(create_vec(100))
#> [1] NA "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [8] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [15] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [22] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" NA
#> [29] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [36] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" NA
#> [43] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [50] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [57] "numeric" NA "numeric" "numeric" "numeric" "numeric" "numeric"
#> [64] "numeric" "numeric" "numeric" "numeric" NA "numeric" "numeric"
#> [71] NA NA NA "numeric" "numeric" "numeric" NA
#> [78] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [85] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" NA
#> [92] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> [99] "numeric" "numeric"
bm1 <- bench::press(
size = c(1e2, 1e3, 1e4, 1e5, 1e6),
{
vec <- create_vec(size)
bench::mark(
new = fingerprint_2(vec),
old = fingerprint(vec)
)
}
)
#> Running with:
#> size
#> 1 100
#> 2 1000
#> 3 10000
#> 4 100000
#> 5 1000000
#> Warning: Some expressions had a GC in every iteration; so filtering is disabled.
plot(bm1)
#> Loading required namespace: tidyr
summary(bm1)
#> Warning: Some expressions had a GC in every iteration; so filtering is disabled.
#> # A tibble: 10 x 7
#> expression size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 new 100 53.27µs 62.7µs 13557. 56.03KB 12.0
#> 2 old 100 45.88µs 50.67µs 17296. 18.5KB 7.90
#> 3 new 1000 99.45µs 133.53µs 6725. 63.19KB 7.97
#> 4 old 1000 157.55µs 186.07µs 5136. 50.97KB 4.00
#> 5 new 10000 769.07µs 917.66µs 899. 625.69KB 9.99
#> 6 old 10000 1.68ms 1.97ms 462. 504.48KB 3.98
#> 7 new 100000 5.49ms 6.57ms 136. 6.1MB 16.0
#> 8 old 100000 15.56ms 18.01ms 51.2 4.92MB 5.91
#> 9 new 1000000 61.29ms 71.12ms 11.3 61.04MB 28.3
#> 10 old 1000000 151.73ms 155.05ms 6.44 49.21MB 4.83
Created on 2021-05-28 by the reprex package (v2.0.0)
After some discussion with Mike, here are some ways to speedup visdat:
fingerprint
- change so that I don'tpaste
in every element (minor speedup)