hypertidy / grout

Abstract raster grid tiling
https://hypertidy.github.io/grout/
5 stars 0 forks source link

use terra elev.tif to figure out makeTiles block generating logic (for targets) #12

Open mdsumner opened 3 weeks ago

mdsumner commented 3 weeks ago
library(terra)
library(grout)

terra_elev <- rast(system.file("ex/elev.tif", package="terra"))
terra_elev
terra_tiles <- rast(ncols = 2, nrows = 2, ext = ext(terra_elev))
terra_tiles
makeTiles(
    x = terra_elev,
    terra_tiles
)

rast("tile_1.tif")

tile_spec <- grout(
    dimension = dim(terra_elev)[2:1],
    extent = as.vector(ext(terra_elev)),
    blocksize = round(dim(terra_elev)[2:1] / 2)
)

tile_index(tile_spec)
mdsumner commented 3 weeks ago

note that getTileExtents() does the same xmin/xmax/ymin/ymax part that grout does @njtierney

library(grout)
tile_index(grout(c(1024, 512), c(-180, 180, -90, 90), c(256, 256)))
# A tibble: 8 × 11
   tile offset_x offset_y tile_col tile_row  ncol  nrow  xmin  xmax  ymin  ymax
  <int>    <dbl>    <dbl>    <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1        0        0        1        1   256   256  -180   -90     0    90
2     2      256        0        2        1   256   256   -90     0     0    90
3     3      512        0        3        1   256   256     0    90     0    90
4     4      768        0        4        1   256   256    90   180     0    90
5     5        0      256        1        2   256   256  -180   -90   -90     0
6     6      256      256        2        2   256   256   -90     0   -90     0
7     7      512      256        3        2   256   256     0    90   -90     0
8     8      768      256        4        2   256   256    90   180   -90     0

library(terra)
getTileExtents(rast(ext(-180, 180, -90, 90), ncols = 1024, nrows = 512), 256)
     xmin xmax ymin ymax
[1,] -180  -90    0   90
[2,]  -90    0    0   90
[3,]    0   90    0   90
[4,]   90  180    0   90
[5,] -180  -90  -90    0
[6,]  -90    0  -90    0
[7,]    0   90  -90    0
[8,]   90  180  -90    0
mdsumner commented 3 weeks ago

and (sheesh) see that st_tile does only the index part (it's for use with the RasterIO arg of read_stars (which corresponds to gdal_translate args outsize and srcwin (albeit with 1-based index in R):

[-outsize <xsize[%]|0> <ysize[%]|0>] 
[-srcwin <xoff> <yoff> <xsize> <ysize>] 
library(stars)
st_tile(512, 1024, 256, 256)
     nXOff nYOff nXSize nYSize
[1,]     1     1    256    256
[2,]     1   257    256    256
[3,]     1   513    256    256
[4,]     1   769    256    256
[5,]   257     1    256    256
[6,]   257   257    256    256
[7,]   257   513    256    256
[8,]   257   769    256    256
mdsumner commented 3 weeks ago

So, my way is to

Example

The hypertidy/sds package has a URI dsn for the GEBCO 2023 dataset, and hypertidy/grout has tiling logic that is otherwise spread out in different functions in terra and stars and in GDAL too.

(dsn <- sds::gebco())
#> [1] "/vsicurl/https://gebco2023.s3.valeria.science/gebco_2023_land_cog.tif"

    library(grout)
    library(vapour)

    info <- vapour_raster_info(dsn)

    ## now the tile index
    (index <- tile_index(grout(info$dimension, info$extent, info$block)))
#> # A tibble: 14,365 × 11
#>     tile offset_x offset_y tile_col tile_row  ncol  nrow  xmin  xmax  ymin  ymax
#>    <int>    <dbl>    <dbl>    <dbl>    <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1     1        0        0        1        1   512   512 -180  -178.  87.9    90
#>  2     2      512        0        2        1   512   512 -178. -176.  87.9    90
#>  3     3     1024        0        3        1   512   512 -176. -174.  87.9    90
#>  4     4     1536        0        4        1   512   512 -174. -171.  87.9    90
#>  5     5     2048        0        5        1   512   512 -171. -169.  87.9    90
#>  6     6     2560        0        6        1   512   512 -169. -167.  87.9    90
#>  7     7     3072        0        7        1   512   512 -167. -165.  87.9    90
#>  8     8     3584        0        8        1   512   512 -165. -163.  87.9    90
#>  9     9     4096        0        9        1   512   512 -163. -161.  87.9    90
#> 10    10     4608        0       10        1   512   512 -161. -159.  87.9    90
#> # ℹ 14,355 more rows

    ## we need "projwin" which is xmin,ymax,xmax,ymin order, we can unlist each element for that arg
    ## we could also use the args "outsize" and "srcwin" if we feel more comfy 
    ex <- split(index[c("xmin", "ymax", "xmax", "ymin")], 1:nrow(index))

    ## create the VRT as a column in the index
    index$vrt <- purrr::map_chr(ex, \(.x) vapour_vrt(dsn, options = c("-projwin", unlist(.x))))

    xml2::read_xml(sample(index$vrt, 1))
#> {xml_document}
#> <VRTDataset rasterXSize="512" rasterYSize="512">
#> [1] <SRS dataAxisToSRSAxisMapping="2,1">GEOGCS["WGS 84",DATUM["WGS_1984",SPHE ...
#> [2] <GeoTransform> -1.4373333333333335e+02,  4.1666666666666666e-03,  0.00000 ...
#> [3] <Metadata>\n  <MDI key="AREA_OR_POINT">Area</MDI>\n</Metadata>
#> [4] <Metadata domain="IMAGE_STRUCTURE">\n  <MDI key="INTERLEAVE">BAND</MDI>\n ...
#> [5] <VRTRasterBand dataType="Int16" band="1" blockXSize="512" blockYSize="512 ...

    ## Now read in the chosen software, and farm out to parallelization how you like

    terra::rast(sample(index$vrt, 1))
#> class       : SpatRaster 
#> dimensions  : 512, 512, 1  (nrow, ncol, nlyr)
#> resolution  : 0.004166667, 0.004166667  (x, y)
#> extent      : 165.6, 167.7333, -16.66667, -14.53333  (xmin, xmax, ymin, ymax)
#> coord. ref. : lon/lat WGS 84 (EPSG:4326) 
#> source      : VRTDataset> 
#> name        : VRTDataset>
    stars::read_stars(sample(index$vrt, 1))
#> stars object with 2 dimensions and 1 attribute
#> attribute(s):
#>         Min. 1st Qu. Median      Mean 3rd Qu.  Max.
#> 11144  -5857   -4836  -4622 -4640.896   -4457 -3324
#> dimension(s):
#>   from  to offset     delta refsys point x/y
#> x    1 512  157.1  0.004167 WGS 84 FALSE [x]
#> y    1 512 -48.67 -0.004167 WGS 84 FALSE [y]
    new(gdalraster::GDALRaster, sample(index$vrt, 1))
#> C++ object <0x5564c171a550> of class 'GDALRaster' <0x5564bd53f860>

    Sys.setenv("RETICULATE_PYTHON"="/usr/bin/python3")
    reticulate::import("xarray")$open_dataset(sample(index$vrt, 1), engine = "rasterio")
#> <xarray.Dataset> Size: 1MB
#> Dimensions:      (band: 1, x: 512, y: 512)
#> Coordinates:
#>   * band         (band) int64 8B 1
#>   * x            (x) float64 4kB -66.93 -66.93 -66.92 ... -64.81 -64.81 -64.8
#>   * y            (y) float64 4kB 70.8 70.79 70.79 70.79 ... 68.68 68.67 68.67
#>     spatial_ref  int64 8B ...
#> Data variables:
#>     band_data    (band, y, x) float32 1MB ...

Created on 2024-06-27 with reprex v2.0.2

mdsumner commented 3 weeks ago

Running with furrr (we just read and discard the 14000 tiles)

(dsn <- sds::gebco())
library(grout)
library(vapour)

info <- vapour_raster_info(dsn)
index <- tile_index(grout(info$dimension, info$extent, info$block))

ex <- split(index[c("xmin", "ymax", "xmax", "ymin")], 1:nrow(index))

## create the VRT as a column in the index
index$vrt <- purrr::map_chr(ex, \(.x) vapour_vrt(dsn, options = c("-projwin", unlist(.x))))

library(furrr)
plan(multicore)
## read the entire file contents, and discard it
f <- function(x) {vapour::gdal_raster_data(x); NULL}

system.time(future_map(index$vrt, f))
   user  system elapsed
189.554  29.819 187.972

That's on a session with 24 cpus on Pawsey.

mdsumner commented 3 weeks ago

and if we increase the block size (they're still aligned to native blocks in a 8x8 configuration, it's faster again (242 blocks).

(dsn <- sds::gebco())
library(grout)
library(vapour)

info <- vapour_raster_info(dsn)
index <- tile_index(grout(info$dimension, info$extent, c(4096, 4096)))

ex <- split(index[c("xmin", "ymax", "xmax", "ymin")], 1:nrow(index))

options(parallelly.fork.enable = TRUE, future.rng.onMisuse = "ignore")
library(furrr)
plan(multicore)

## create the VRT as a column in the index
index$vrt <- future_map_chr(ex, \(.x) vapour_vrt(dsn, options = c("-projwin", unlist(.x))))

## read the entire file contents, and discard it
f <- function(x) {vapour::gdal_raster_data(x); NULL}

system.time(future_map(index$vrt, f))
   user  system elapsed
159.273  23.280  88.116