mdsumner / gdalraster

R Bindings to the GDAL Raster API
https://usdaforestservice.github.io/gdalraster/
Other
0 stars 0 forks source link

read_col_row PR #1

Open mdsumner opened 6 months ago

mdsumner commented 6 months ago

Here's an example, takes 285 seconds on 32 cores

here we organize by 512x512 block and parallelize over read

library(gdalraster)
ds <- new(GDALRaster, sds::gebco())

dm <- c(ds$getRasterXSize(), ds$getRasterYSize())
ex <- ds$bbox()[c(1, 3, 2, 4)]
xy <- maps::world.cities[, c("long", "lat")]
cell <- vaster::cell_from_xy(dm, ex, xy)

ord <- order(cell)
rc <- vaster::rowcol_from_cell(dm, ex, cell[ord])-1

blocksize <- ds$getBlockSize(1L)
block <- vaster::cell_from_xy(dm %/% blocksize, ex, xy)
block_ord <- order(block)
block_rc <- vaster::rowcol_from_cell(dm %/% blocksize, ex, block[block_ord])-1
x <- tibble::tibble(x = xy[,1], y = xy[,2], cell = cell[block_ord], block = block[block_ord], 
                    col = rc[block_ord,2], row = rc[block_ord,1])
l <- split(x, x$block)

options(parallelly.fork.enable = TRUE, future.rng.onMisuse = "ignore")
library(furrr); plan(multicore)
system.time({
v <- future_map(l, function(.x) ds$read_col_row(1L, .x$col, .x$row))
})

plan(sequential)
mdsumner commented 6 months ago

Another benchmark, 2232 seconds on 32 cores, as above using this branch: https://github.com/mdsumner/gdalraster/tree/read-col-row

library(gdalraster)
ds <- new(GDALRaster, sds::cop30())

dm <- c(ds$getRasterXSize(), ds$getRasterYSize())
ex <- ds$bbox()[c(1, 3, 2, 4)]
xy <- maps::world.cities[, c("long", "lat")]
cell <- vaster::cell_from_xy(dm, ex, xy)

ord <- order(cell)
rc <- vaster::rowcol_from_cell(dm, ex, cell[ord])-1

blocksize <- ds$getBlockSize(1L)
block <- vaster::cell_from_xy(dm %/% blocksize, ex, xy)
block_ord <- order(block)
block_rc <- vaster::rowcol_from_cell(dm %/% blocksize, ex, block[block_ord])-1
x <- tibble::tibble(x = xy[,1], y = xy[,2], cell = cell[block_ord], block = block[block_ord], 
                    col = rc[block_ord,2], row = rc[block_ord,1])
l <- split(x, x$block)

options(parallelly.fork.enable = TRUE, future.rng.onMisuse = "ignore")
library(furrr); plan(multicore)
system.time({
v <- future_map(l, function(.x) ds$read_col_row(1L, .x$col, .x$row))
})

plan(sequential)