Open mdsumner opened 6 months ago
Another benchmark, 2232 seconds on 32 cores, as above using this branch: https://github.com/mdsumner/gdalraster/tree/read-col-row
library(gdalraster)
ds <- new(GDALRaster, sds::cop30())
dm <- c(ds$getRasterXSize(), ds$getRasterYSize())
ex <- ds$bbox()[c(1, 3, 2, 4)]
xy <- maps::world.cities[, c("long", "lat")]
cell <- vaster::cell_from_xy(dm, ex, xy)
ord <- order(cell)
rc <- vaster::rowcol_from_cell(dm, ex, cell[ord])-1
blocksize <- ds$getBlockSize(1L)
block <- vaster::cell_from_xy(dm %/% blocksize, ex, xy)
block_ord <- order(block)
block_rc <- vaster::rowcol_from_cell(dm %/% blocksize, ex, block[block_ord])-1
x <- tibble::tibble(x = xy[,1], y = xy[,2], cell = cell[block_ord], block = block[block_ord],
col = rc[block_ord,2], row = rc[block_ord,1])
l <- split(x, x$block)
options(parallelly.fork.enable = TRUE, future.rng.onMisuse = "ignore")
library(furrr); plan(multicore)
system.time({
v <- future_map(l, function(.x) ds$read_col_row(1L, .x$col, .x$row))
})
plan(sequential)
Here's an example, takes 285 seconds on 32 cores
here we organize by 512x512 block and parallelize over read