njtierney / geotargets

Targets extensions for geospatial data
https://njtierney.github.io/geotargets/
Other
49 stars 4 forks source link

[draft] tar_terra_rast_wrap: multi-target method to preserve SpatRaster metadata #63

Open brownag opened 2 months ago

brownag commented 2 months ago

This is a draft PR that might be able to address #58

This is a completely different way of managing target files--where the target file in _targets/objects/ is an RDS file (like ordinary targets) containing a PackedSpatRaster which is backed by a cached geospatial data file (and any sidecars) held in a user-specified folder

For now this only works for SpatRaster, but I think a similar solution could be developed for SpatVectorProxy (although this would require either changes to wrapCache() in terra, or a custom wrapCache()-like method developed for this case)

Current "issue" is that you can modify the cache (intentionally or unintentionally) and the main target will not be invalidated. I tried tracking the cache directory before running the caching target, but then this leads to the caching having to run twice before it is skipped.

Example of storing units and categories:

library(targets)
tar_script({

    make_rast1 <- function() {
        x <- terra::rast(system.file("ex/elev.tif", package = "terra"))
        terra::units(x) <- "m"
        terra::varnames(x) <- "elev"
        x
    }

    make_rast2 <- function() {
        x <- terra::rast(system.file("ex/elev.tif", package = "terra"))
        y <- terra::classify(x, cbind(c(0, 300, 500),
                                      c(300, 500, 1000),
                                      1:3))
        levels(y) <- data.frame(value = 1:3,
                                category = c("low", "med", "hi"))
        y
    }

    list(
        geotargets::tar_terra_rast_wrap(
            rast1,
            make_rast1()
        ),
        geotargets::tar_terra_rast_wrap(
            rast2,
            make_rast2()
        )
    )
})

tar_make()
#> ▶ dispatched target rast1
#> ● completed target rast1 [0.009 seconds]
#> ▶ dispatched target rast2
#> ● completed target rast2 [0.016 seconds]
#> ▶ dispatched target rast1_cache_files
#> ● completed target rast1_cache_files [0 seconds]
#> ▶ dispatched target rast2_cache_files
#> ● completed target rast2_cache_files [0 seconds]
#> ▶ ended pipeline [0.334 seconds]

x_raw <- readRDS("_targets/objects/rast1")
x <- tar_read(rast1)

x_raw@attributes
#> $sources
#>   sid
#> 1   1
#>                                                                             source
#> 1 /tmp/RtmpfZAMV0/reprex-67d786408f708-bared-pika/geotargets_cache/rast1/rast1.tif
#>   bands nlyr
#> 1     1    1
#> 
#> $units
#> [1] "m"

terra::units(x)
#> [1] "m"

# varnames not preserved in PackedSpatRaster either
terra::varnames(x)
#> [1] "rast1"

x
#> class       : SpatRaster 
#> dimensions  : 90, 95, 1  (nrow, ncol, nlyr)
#> resolution  : 0.008333333, 0.008333333  (x, y)
#> extent      : 5.741667, 6.533333, 49.44167, 50.19167  (xmin, xmax, ymin, ymax)
#> coord. ref. : lon/lat WGS 84 (EPSG:4326) 
#> source      : rast1.tif 
#> name        : elevation 
#> min value   :       141 
#> max value   :       547 
#> unit        :         m

x <- tar_read(rast2)

terra::levels(x)
#> [[1]]
#>   value category
#> 1     1      low
#> 2     2      med
#> 3     3       hi

x
#> class       : SpatRaster 
#> dimensions  : 90, 95, 1  (nrow, ncol, nlyr)
#> resolution  : 0.008333333, 0.008333333  (x, y)
#> extent      : 5.741667, 6.533333, 49.44167, 50.19167  (xmin, xmax, ymin, ymax)
#> coord. ref. : lon/lat WGS 84 (EPSG:4326) 
#> source      : rast2.tif 
#> categories  : category 
#> name        : category 
#> min value   :      low 
#> max value   :       hi

tar_read(rast1_cache_files)
#> [1] "geotargets_cache/rast1/rast1.tif"         
#> [2] "geotargets_cache/rast1/rast1.tif.aux.json"

# all skip
tar_make()
#> ✔ skipped target rast1
#> ✔ skipped target rast2
#> ✔ skipped target rast1_cache_files
#> ✔ skipped target rast2_cache_files
#> ✔ skipped pipeline [0.119 seconds]

# change the rast1 cache by changing units
x <- jsonlite::read_json("geotargets_cache/rast1/rast1.tif.aux.json")
x[[1]][[1]] <- "km" 
jsonlite::write_json(x, "geotargets_cache/rast1/rast1.tif.aux.json")

# need to rebuild rast1 target
tar_make()
#> ✔ skipped target rast1
#> ✔ skipped target rast2
#> ▶ dispatched target rast1_cache_files
#> ● completed target rast1_cache_files [0.001 seconds]
#> ✔ skipped target rast2_cache_files
#> ▶ ended pipeline [0.153 seconds]

# all skip
tar_make()
#> ✔ skipped target rast1
#> ✔ skipped target rast2
#> ✔ skipped target rast1_cache_files
#> ✔ skipped target rast2_cache_files
#> ✔ skipped pipeline [0.118 seconds]