Closed philipp-baumann closed 1 year ago
r$> # Set up -----------------------------------------------------------------------
library("opusreader2")
path <- file.path("data", "spectra", "2018-NABO")
opus_files <- list.files(
path = path, pattern = "\\.\\d+$", full.names = TRUE, recursive = TRUE
)
# number of OPUS files
length(opus_files)
# Benchmark single-threaded reading (standard) ---------------------------------
bnch_seq <- bench::mark(
data <- opusreader2::read_opus(dsn = path)
)
bnch_seq
# A tibble: 1 × 13
expression min median itr/s…¹ mem_a…² gc/se…³ n_itr n_gc total…⁴ result memory
<bch:expr> <bch> <bch:> <dbl> <bch:b> <dbl> <int> <dbl> <bch:t> <list> <list>
1 data <- opusreader2::read_opus(dsn = path) 21.1s 21.1s 0.0475 8.32GB 4.32 1 91 21.1s <list> <Rprofmem>
# … with 2 more variables: time <list>, gc <list>, and abbreviated variable names ¹`itr/sec`, ²mem_alloc, ³`gc/sec`,
# ⁴total_time
# ℹ Use `colnames()` to see all variable names
Warning message:
Some expressions had a GC in every iteration; so filtering is disabled.
r$> # memory sizes on disk for files, memory size
total_filesize_mb <- sum(sapply(opus_files, file.size)) / 1000^2
parsed_data_size <- unclass(pryr::object_size(data)) / 1000^2
read_alloc_memory <- unclass(bnch_seq$mem_alloc) / 1000^2
# comparson of file size and parsed size
mem_comp_seq <- tibble::tribble(
~total_file_size_MB, ~read_alloc_memory_in_MB, ~parsed_size_in_MB,
total_filesize_mb, read_alloc_memory, parsed_data_size
)
mem_comp_seq
# A tibble: 1 × 3
total_file_size_MB read_alloc_memory_in_MB parsed_size_in_MB
<dbl> <dbl> <dbl>
1 490. 8930. 2104.
r$> # Benchmark asynchronous (multisession) reading --------------------------------
library("future")
availableCores() # 8 threads := 4 cores
plan(multisession(workers = 4))
# does chunk the file list according to dsn into number of `workers` registered
# in future
bnch_multis4 <- bench::mark(
data_multis4 <- opusreader2::read_opus(dsn = path, parallel = TRUE)
)
Loading required package: future.apply
Warning message:
Some expressions had a GC in every iteration; so filtering is disabled.
r$> bnch_multis4
# A tibble: 1 × 13
expression min median itr/s…¹ mem_a…² gc/se…³ n_itr n_gc
<bch:expr> <bch:> <bch:> <dbl> <bch:b> <dbl> <int> <dbl>
1 data_multis4 <- opusreader2::read_opus(dsn = path, parallel = TRUE) 8.92s 8.92s 0.112 2.65GB 0.112 1 1
# … with 5 more variables: total_time <bch:tm>, result <list>, memory <list>, time <list>, gc <list>, and abbreviated
# variable names ¹`itr/sec`, ²mem_alloc, ³`gc/sec`
# ℹ Use `colnames()` to see all variable names
r$> parsed_data_size_multis4 <- unclass(pryr::object_size(data_multis4)) / 1000^2
read_alloc_memory_multis4 <- unclass(bnch_multis4$mem_alloc) / 1000^2
mem_comp_multis4 <- tibble::tribble(
~total_file_size_MB, ~read_alloc_memory_in_MB, ~parsed_size_in_MB,
total_filesize_mb, read_alloc_memory_multis4, parsed_data_size_multis4
)
mem_comp_multis4
# A tibble: 1 × 3
total_file_size_MB read_alloc_memory_in_MB parsed_size_in_MB
<dbl> <dbl> <dbl>
1 490. 2846. 3076.
r$> # Benchmark asynchronous (multicore) reading -----------------------------------
# uses forking of processes; only supported on Linux and MacOS
# also not recommended in RStudio (other the terminal or another editor like
# VSCode)
library("future")
plan(multicore(workers = 4))
# does chunk the file list according to dsn into number of `workers` registered
# in future
bnch_multic4 <- bench::mark(
data_multic4 <- opusreader2::read_opus(dsn = path, parallel = TRUE),
memory = FALSE # If you are benchmarking parallel code you must set `memory = FALSE`.
)
bnch_multic4
# A tibble: 1 × 13
expression min median itr/s…¹ mem_a…² gc/se…³ n_itr n_gc
<bch:expr> <bch:> <bch:> <dbl> <bch:b> <dbl> <int> <dbl>
1 data_multic4 <- opusreader2::read_opus(dsn = path, parallel = TRUE) 11.7s 11.7s 0.0852 NA 2.98 1 35
# … with 5 more variables: total_time <bch:tm>, result <list>, memory <list>, time <list>, gc <list>, and abbreviated
# variable names ¹`itr/sec`, ²mem_alloc, ³`gc/sec`
# ℹ Use `colnames()` to see all variable names
Warning message:
Some expressions had a GC in every iteration; so filtering is disabled.
r$> (parsed_data_size_multic4 <- unclass(pryr::object_size(data_multis4)) / 1000^2)
[1] 3076.439
r$> (read_alloc_memory_multic4 <- unclass(bnch_multis4$mem_alloc) / 1000^2)
[1] 2845.69
Here is the graphical "manual" (see first comment) profiling.
before R session with setup before read_opus()
read_opus()
after reading in parallel on a 4 cores registered in multisession mode. (chunked reading).
=> Based on the results above, we should do some more profiling to see where we can improve the single-threaded implementation (standard) of read_opus_impl()
.
Code for profiling when reading one file.
library("opusreader2")
file <- opus_file()
devtools::load_all()
library("profvis")
# visual benchmarking
profvis(
{
data <- read_opus(dsn = file)
},
interval = 0.005
)
# tabular benchmarking
bnch <- bench::mark(
data <- read_opus(dsn = file)
)
mem_list <- bnch$memory
mem_tab <- mem_list[[1]]
library("data.table")
setDT(mem_tab)
mem_ord <- na.omit(mem_tab[order(-bytes)])
# ordered table with stack trace expressions, top 31 memory usage
mem_top31 <- mem_ord[1:31, ]
mem_top31
r$> bnch
# A tibble: 1 × 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total…¹ result memory
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:t> <list> <list>
1 data <- read_opus(dsn = file) 16.8ms 17.8ms 56.3 1.02MB 13.4 21 5 373ms <named list> <Rprofmem>
# … with 2 more variables: time <list>, gc <list>, and abbreviated variable name ¹total_time
# ℹ Use `colnames()` to see all variable names
r$> mem_top31
what bytes trace
1: alloc 72080 readBin,read_raw.file,read_raw,read_opus_raw,read_opus_impl,FUN,...
2: alloc 72080 rawConnection,parse_opus,read_opus_impl,FUN,lapply,opus_lapply,...
3: alloc 38656 readBin,read_float,parse_chunk.data,parse_chunk,FUN,lapply,...
4: alloc 38648 seq.default,seq,rev,prepare_spectra,f,Reduce,...
5: alloc 38648 pmin,seq.default,seq,rev,prepare_spectra,f,...
6: alloc 38648 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
7: alloc 38648 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
8: alloc 38648 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
9: alloc 38608 readBin,read_float,parse_chunk.data,parse_chunk,FUN,lapply,...
10: alloc 38608 readBin,read_float,parse_chunk.data,parse_chunk,FUN,lapply,...
11: alloc 38600 seq.default,seq,rev,prepare_spectra,f,Reduce,...
12: alloc 38600 pmin,seq.default,seq,rev,prepare_spectra,f,...
13: alloc 38600 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
14: alloc 38600 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
15: alloc 38600 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
16: alloc 38600 seq.default,seq,rev,prepare_spectra,f,Reduce,...
17: alloc 38600 pmin,seq.default,seq,rev,prepare_spectra,f,...
18: alloc 38600 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
19: alloc 38600 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
20: alloc 38600 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
21: alloc 34736 readBin,read_character,parse_chunk.text,parse_chunk,FUN,lapply,...
22: alloc 19352 seq.default,seq,rev,prepare_spectra,f,Reduce,...
23: alloc 19352 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
24: alloc 19352 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
25: alloc 19328 seq.default,seq,rev,prepare_spectra,f,Reduce,...
26: alloc 19328 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
27: alloc 19328 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
28: alloc 19328 seq.default,seq,rev,prepare_spectra,f,Reduce,...
29: alloc 19328 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
30: alloc 19328 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
31: alloc 2096 <Anonymous>,cmpCallSymFun,cmpCall,cmp,cmpSymbolAssign,h,...
what bytes trace
I have two tentative remarks from rough profiling above (profvis guesses that estimated intervals of < 5ms will gain accurate timings).
r$> data.table(
list_object = names(data),
R_memory_size = lapply(data, pryr::object_size)
)
list_object R_memory_size
1: refl_no_atm_comp_data_param 8.54 kB
2: refl_no_atm_comp 79.81 kB
3: quant_report_refl 5.62 kB
4: sc_sample_data_param 8.54 kB
5: sc_sample 79.79 kB
6: sc_ref_data_param 8.54 kB
7: sc_ref 79.88 kB
8: optics 10.67 kB
9: optics_ref 10.60 kB
10: acquisition_ref 11.53 kB
11: fourier_transformation_ref 7.34 kB
12: fourier_transformation 7.34 kB
13: sample 9.18 kB
14: acquisition 11.51 kB
15: instrument_ref 27.37 kB
16: instrument 26.78 kB
17: lab_and_process_param_raw 8.18 kB
18: lab_and_process_param_processed 8.79 kB
19: info_block 14.16 kB
20: history 8.66 kB
Inevitably, the corresponding wavenumbers take almost as much memory as the 1-row matrix of the spectra.
r$> data.table(
list_object = names(refl_no_atm_comp),
data_type = lapply(refl_no_atm_comp, class),
R_memory_size = lapply(refl_no_atm_comp, pryr::object_size)
)
list_object data_type R_memory_size
1: block_type integer 56 B
2: channel_type integer 56 B
3: text_type integer 56 B
4: additional_type integer 56 B
5: offset integer 56 B
6: next_offset integer 56 B
7: chunk_size integer 56 B
8: block_type_name character 136 B
9: data matrix,array 39.42 kB
10: wavenumbers numeric 38.60 kB
Here some further exploration.
# spectra object (list) without wavenumbers element
refl <- refl_no_atm_comp[!names(refl_no_atm_comp) %in% "wavenumbers"]
wavenumbers <- refl_no_atm_comp$wavenumbers
# conceptually as in `prepare_spectra()`
refl_list <- list(refl = refl)
pryr::object_size(refl)
pryr::object_size(refl_list)
# define appending methods to test in benchmark
c_fun <- function(refl_list, wavenumbers) {
refl_list[[1]] <- c(refl_list[[1]], wavenumbers = wavenumbers)
return(refl_list)
}
append_fun <- function(refl_list, wavenumbers) {
names_first <- names(refl_list[[1]])
refl_list[[1]] <- append(x = refl_list[[1]], values = wavenumbers)
# one extra step to set names
names(refl_list[[1]]) <- c(names_first, "wavenumbers")
return(refl_list)
}
# at first level
c_fun_first <- function(refl, wavenumbers) {
refl <- c(refl, wavenumbers = wavenumbers)
return(refl)
}
r$> ## benchmark: almost no difference in terms of memory, however processing time
# best solution
bench::mark(
c_fun(refl_list, wavenumbers)
)
# A tibble: 1 × 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 c_fun(refl_list, wavenumbers) 1.13ms 1.14ms 796. 75.5KB 2.03 393 1 494ms <named list [1]> <Rprofmem [2 × 3]> <bench_tm [394]> <tibble [394 × 3]>
r$> # actually, this is the most efficient for read time!
bench::mark(
append_fun(refl_list, wavenumbers)
)
# A tibble: 1 × 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 append_fun(refl_list, wavenumbers) 142µs 158µs 6056. 113KB 28.1 2586 12 427ms <named list [1]> <Rprofmem [3 × 3]> <bench_tm [2,598]> <tibble [2,598 × 3]>
r$> # no difference to c_fun
bench::mark(
c_fun_first(refl, wavenumbers)
)
# A tibble: 1 × 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 c_fun_first(refl, wavenumbers) 1.12ms 1.14ms 820. 75.5KB 4.11 399 2 487ms <named list [4,828]> <Rprofmem [2 × 3]> <bench_tm [401]> <tibble [401 × 3]>
``
append
+ naming (append_fun()
approach above) instead of c()
. Let's implement this?prepare_spectra
results simply because the file stores the wavenumbers as "from", "to", "by" (vector of lenght 3 with doubles), but we expand this for each spectrum to a full sequence. This offers I think new ways how we can implement memory reduction from the OPUS "file" binary logic. For 2., I spontaneously think of
read_opus()
. We could implement sort of like a lookup table that maps sets of sample measurements (OPUS files; append()
in Reduce
context of prepare_spectra()
, it performs worse in terms of memory and also speed. Interesting take home. Not sure where we can still gain. Maybe a +1 🧗 for the current implementation.@ThomasKnecht currently we iterate through all elements of dataset_list
for spectral data types through base::Reduce , which is quite elegant.
Just to try, hat about doing a selective element update with re-assignment of specific elements in a loop?
Closing for now as we have not found speed or memory gains in #59 . The only think we could still do is not expand the x-axis (spatial frerquency in wavenumbers) given by FXV (frequency of first X-variable), LXV (frequency of last X-variable) and NPT (number of points) into the extra wavenumbers
vectors.
=> suggested fix for the moment: use
htop
to observe/screenshot max memory.