Open mdsumner opened 3 weeks ago
note that getTileExtents() does the same xmin/xmax/ymin/ymax part that grout does @njtierney
library(grout)
tile_index(grout(c(1024, 512), c(-180, 180, -90, 90), c(256, 256)))
# A tibble: 8 × 11
tile offset_x offset_y tile_col tile_row ncol nrow xmin xmax ymin ymax
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0 0 1 1 256 256 -180 -90 0 90
2 2 256 0 2 1 256 256 -90 0 0 90
3 3 512 0 3 1 256 256 0 90 0 90
4 4 768 0 4 1 256 256 90 180 0 90
5 5 0 256 1 2 256 256 -180 -90 -90 0
6 6 256 256 2 2 256 256 -90 0 -90 0
7 7 512 256 3 2 256 256 0 90 -90 0
8 8 768 256 4 2 256 256 90 180 -90 0
library(terra)
getTileExtents(rast(ext(-180, 180, -90, 90), ncols = 1024, nrows = 512), 256)
xmin xmax ymin ymax
[1,] -180 -90 0 90
[2,] -90 0 0 90
[3,] 0 90 0 90
[4,] 90 180 0 90
[5,] -180 -90 -90 0
[6,] -90 0 -90 0
[7,] 0 90 -90 0
[8,] 90 180 -90 0
and (sheesh) see that st_tile does only the index part (it's for use with the RasterIO arg of read_stars (which corresponds to gdal_translate
args outsize
and srcwin
(albeit with 1-based index in R):
[-outsize <xsize[%]|0> <ysize[%]|0>]
[-srcwin <xoff> <yoff> <xsize> <ysize>]
library(stars)
st_tile(512, 1024, 256, 256)
nXOff nYOff nXSize nYSize
[1,] 1 1 256 256
[2,] 1 257 256 256
[3,] 1 513 256 256
[4,] 1 769 256 256
[5,] 257 1 256 256
[6,] 257 257 256 256
[7,] 257 513 256 256
[8,] 257 769 256 256
So, my way is to
The hypertidy/sds package has a URI dsn for the GEBCO 2023 dataset, and hypertidy/grout has tiling logic that is otherwise spread out in different functions in terra and stars and in GDAL too.
(dsn <- sds::gebco())
#> [1] "/vsicurl/https://gebco2023.s3.valeria.science/gebco_2023_land_cog.tif"
library(grout)
library(vapour)
info <- vapour_raster_info(dsn)
## now the tile index
(index <- tile_index(grout(info$dimension, info$extent, info$block)))
#> # A tibble: 14,365 × 11
#> tile offset_x offset_y tile_col tile_row ncol nrow xmin xmax ymin ymax
#> <int> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 0 1 1 512 512 -180 -178. 87.9 90
#> 2 2 512 0 2 1 512 512 -178. -176. 87.9 90
#> 3 3 1024 0 3 1 512 512 -176. -174. 87.9 90
#> 4 4 1536 0 4 1 512 512 -174. -171. 87.9 90
#> 5 5 2048 0 5 1 512 512 -171. -169. 87.9 90
#> 6 6 2560 0 6 1 512 512 -169. -167. 87.9 90
#> 7 7 3072 0 7 1 512 512 -167. -165. 87.9 90
#> 8 8 3584 0 8 1 512 512 -165. -163. 87.9 90
#> 9 9 4096 0 9 1 512 512 -163. -161. 87.9 90
#> 10 10 4608 0 10 1 512 512 -161. -159. 87.9 90
#> # ℹ 14,355 more rows
## we need "projwin" which is xmin,ymax,xmax,ymin order, we can unlist each element for that arg
## we could also use the args "outsize" and "srcwin" if we feel more comfy
ex <- split(index[c("xmin", "ymax", "xmax", "ymin")], 1:nrow(index))
## create the VRT as a column in the index
index$vrt <- purrr::map_chr(ex, \(.x) vapour_vrt(dsn, options = c("-projwin", unlist(.x))))
xml2::read_xml(sample(index$vrt, 1))
#> {xml_document}
#> <VRTDataset rasterXSize="512" rasterYSize="512">
#> [1] <SRS dataAxisToSRSAxisMapping="2,1">GEOGCS["WGS 84",DATUM["WGS_1984",SPHE ...
#> [2] <GeoTransform> -1.4373333333333335e+02, 4.1666666666666666e-03, 0.00000 ...
#> [3] <Metadata>\n <MDI key="AREA_OR_POINT">Area</MDI>\n</Metadata>
#> [4] <Metadata domain="IMAGE_STRUCTURE">\n <MDI key="INTERLEAVE">BAND</MDI>\n ...
#> [5] <VRTRasterBand dataType="Int16" band="1" blockXSize="512" blockYSize="512 ...
## Now read in the chosen software, and farm out to parallelization how you like
terra::rast(sample(index$vrt, 1))
#> class : SpatRaster
#> dimensions : 512, 512, 1 (nrow, ncol, nlyr)
#> resolution : 0.004166667, 0.004166667 (x, y)
#> extent : 165.6, 167.7333, -16.66667, -14.53333 (xmin, xmax, ymin, ymax)
#> coord. ref. : lon/lat WGS 84 (EPSG:4326)
#> source : VRTDataset>
#> name : VRTDataset>
stars::read_stars(sample(index$vrt, 1))
#> stars object with 2 dimensions and 1 attribute
#> attribute(s):
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 11144 -5857 -4836 -4622 -4640.896 -4457 -3324
#> dimension(s):
#> from to offset delta refsys point x/y
#> x 1 512 157.1 0.004167 WGS 84 FALSE [x]
#> y 1 512 -48.67 -0.004167 WGS 84 FALSE [y]
new(gdalraster::GDALRaster, sample(index$vrt, 1))
#> C++ object <0x5564c171a550> of class 'GDALRaster' <0x5564bd53f860>
Sys.setenv("RETICULATE_PYTHON"="/usr/bin/python3")
reticulate::import("xarray")$open_dataset(sample(index$vrt, 1), engine = "rasterio")
#> <xarray.Dataset> Size: 1MB
#> Dimensions: (band: 1, x: 512, y: 512)
#> Coordinates:
#> * band (band) int64 8B 1
#> * x (x) float64 4kB -66.93 -66.93 -66.92 ... -64.81 -64.81 -64.8
#> * y (y) float64 4kB 70.8 70.79 70.79 70.79 ... 68.68 68.67 68.67
#> spatial_ref int64 8B ...
#> Data variables:
#> band_data (band, y, x) float32 1MB ...
Created on 2024-06-27 with reprex v2.0.2
Running with furrr (we just read and discard the 14000 tiles)
(dsn <- sds::gebco())
library(grout)
library(vapour)
info <- vapour_raster_info(dsn)
index <- tile_index(grout(info$dimension, info$extent, info$block))
ex <- split(index[c("xmin", "ymax", "xmax", "ymin")], 1:nrow(index))
## create the VRT as a column in the index
index$vrt <- purrr::map_chr(ex, \(.x) vapour_vrt(dsn, options = c("-projwin", unlist(.x))))
library(furrr)
plan(multicore)
## read the entire file contents, and discard it
f <- function(x) {vapour::gdal_raster_data(x); NULL}
system.time(future_map(index$vrt, f))
user system elapsed
189.554 29.819 187.972
That's on a session with 24 cpus on Pawsey.
and if we increase the block size (they're still aligned to native blocks in a 8x8 configuration, it's faster again (242 blocks).
(dsn <- sds::gebco())
library(grout)
library(vapour)
info <- vapour_raster_info(dsn)
index <- tile_index(grout(info$dimension, info$extent, c(4096, 4096)))
ex <- split(index[c("xmin", "ymax", "xmax", "ymin")], 1:nrow(index))
options(parallelly.fork.enable = TRUE, future.rng.onMisuse = "ignore")
library(furrr)
plan(multicore)
## create the VRT as a column in the index
index$vrt <- future_map_chr(ex, \(.x) vapour_vrt(dsn, options = c("-projwin", unlist(.x))))
## read the entire file contents, and discard it
f <- function(x) {vapour::gdal_raster_data(x); NULL}
system.time(future_map(index$vrt, f))
user system elapsed
159.273 23.280 88.116