bleutner / RStoolbox

Remote Sensing Data Analysis in R 🛰
266 stars 82 forks source link

`histMatch()` fails on multiband rasters #117

Open smckenzie1986 opened 6 days ago

smckenzie1986 commented 6 days ago

Hi Benjamin, I have been doing some processing to Sentinel 2 imagery, and I think I have discovered a bug with histMatch() when I attempt to do color balancing between tiles with histMatch() I have noticed that many bands show much lower maximum values than what is in the corresponding band of the reference raster. Digging into this, I think the issue arises when totalFun() gets passed to terra:::app(). terra::app() doesn't cycle through the multiple inverse reference empirical cumulative density functions that are created from layerFun You can observe the behavior with this reproducible example:

##Load Necessary Packages##
library(terra)
library(RStoolbox)

set.seed(26) #For reproducibility

##Create Fake raster to color balance##
r1a<-rast(xmin=-123.05, xmax=-122, ymin=45, ymax=45.5, nrows=100, ncols=100, crs="epsg:4326")
r1b<-rast(xmin=-123.05, xmax=-122, ymin=45, ymax=45.5, nrows=100, ncols=100, crs="epsg:4326")
r1c<-rast(xmin=-123.05, xmax=-122, ymin=45, ymax=45.5, nrows=100, ncols=100, crs="epsg:4326")

values(r1a)<-rnorm(10000, 2000, 150)
values(r1b)<-rnorm(10000, 5000, 100)
values(r1c)<-rnorm(10000, 7000, 250)

r1<-c(r1a, r1b, r1c)

##Create Fake reference raster##
r2a<-rast(xmin=-122.05, xmax=-121, ymin=45, ymax=45.5, nrows=100, ncols=100, crs="epsg:4326")
r2b<-rast(xmin=-122.05, xmax=-121, ymin=45, ymax=45.5, nrows=100, ncols=100, crs="epsg:4326")
r2c<-rast(xmin=-122.05, xmax=-121, ymin=45, ymax=45.5, nrows=100, ncols=100, crs="epsg:4326")

values(r2a)<-rnorm(10000, 5000, 300)
values(r2b)<-rnorm(10000, 4000, 300)
values(r2c)<-rnorm(10000, 6000, 100)

r2<-c(r2a, r2b, r2c)

#make the rasters a bit more realistic by making some cells NA 
blankem1<-sample(1:10000, 6500, replace=FALSE)
values(r1)[blankem1]<-NA

set.seed(45) #For a different set of NA cells in the reference raster
blankem2<-sample(1:10000, 6500, replace=FALSE)
values(r2)[blankem2]<-NA

##add some extreme values to make the raster cell distribution heteroskedastic##
set.seed(96)#For a different set of cells to be extreme values in the raster to color balance
extreme1<-sample(1:10000, 300, replace=FALSE)
values(r1)[extreme1]<-values(r1)[extreme1]-rnorm(300, 500, 50)

set.seed(53)#For a different set of cells to be extreme values in the reference raster
extreme2<-sample(1:10000, 300, replace=FALSE)
values(r2)[extreme2]<-values(r2)[extreme2]+rnorm(300, 7000, 150)

##Observe the fake raster RGB images and the distribution of values in each band# 
X11()
par(mfrow=c(4,2))
plotRGB(r1[[c(1:3)]], scale=15000, main = "Original RGB Image")
plotRGB(r2[[c(1:3)]], scale=15000, main="Reference RGB Image")
hist(r1[[1]], breaks=100, xlim=c(0, 10000), main = "Original band 1")
hist(r2[[1]], breaks=100, xlim=c(0, 10000), main="Reference band 1")
hist(r1[[2]], breaks=100, xlim=c(0, 10000), main = "Original band 2")
hist(r2[[2]], breaks=100, xlim=c(0, 10000), main="Reference band 2")
hist(r1[[3]], breaks=100, xlim=c(0, 10000), main = "Original band 3")
hist(r2[[3]], breaks=100, xlim=c(0, 10000), main="Reference band 3")

##Histogram match r1 to r2
HM<-histMatch(r1, r2, intersectOnly = FALSE, paired=FALSE)

##Compare the histogram matched RGB image and band distributions to the original and the reference##
par(mfrow=c(4,3))
plotRGB(r1[[c(1:3)]], scale=15000, main="Original")
plotRGB(r2[[c(1:3)]], scale=15000, main="Reference")
plotRGB(HM[[c(1:3)]], scale=15000, main="HistMatched")
hist(r1[[1]], breaks=100, xlim=c(0, 10000), main="Original Band 1")
hist(r2[[1]], breaks=100, xlim=c(0, 10000), main="Original Band 1")
hist(HM[[1]], breaks=100, xlim=c(0, 10000), main="HistMatched Band 1")
hist(r1[[2]], breaks=100, xlim=c(0, 10000), main="Original Band 2")
hist(r2[[3]], breaks=100, xlim=c(0, 10000), main="Reference Band 2")
hist(HM[[2]], breaks=100, xlim=c(0, 10000), main="HistMatched Band 2")
hist(r1[[3]], breaks=100, xlim=c(0, 10000), main="Original Band 3")
hist(r2[[3]], breaks=100, xlim=c(0, 10000), main="Reference Band 3")
hist(HM[[3]], breaks=100, xlim=c(0, 10000), main="HistMatched Band 3")

Here is my session info:

R version 4.3.3 (2024-02-29 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19044)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
[3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
[5] LC_TIME=English_United States.utf8    

time zone: America/Los_Angeles
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] RStoolbox_1.0.0 terra_1.8-0    

loaded via a namespace (and not attached):
 [1] gtable_0.3.5         raster_3.6-26        ggplot2_3.5.1        recipes_1.1.0        lattice_0.22-5      
 [6] vctrs_0.6.5          tools_4.3.3          generics_0.1.3       stats4_4.3.3         parallel_4.3.3      
[11] tibble_3.2.1         proxy_0.4-27         fansi_1.0.6          ModelMetrics_1.2.2.2 pkgconfig_2.0.3     
[16] Matrix_1.6-5         KernSmooth_2.23-22   data.table_1.16.0    lifecycle_1.0.4      compiler_4.3.3      
[21] stringr_1.5.1        munsell_0.5.1        codetools_0.2-19     class_7.3-22         prodlim_2024.06.25  
[26] tidyr_1.3.1          exactextractr_0.10.0 pillar_1.9.0         MASS_7.3-60.0.1      classInt_0.4-10     
[31] gower_1.0.1          iterators_1.0.14     rpart_4.1.23         foreach_1.5.2        nlme_3.1-164        
[36] parallelly_1.38.0    lava_1.8.0           tidyselect_1.2.1     digest_0.6.35        stringi_1.8.3       
[41] future_1.34.0        sf_1.0-19            dplyr_1.1.4          reshape2_1.4.4       purrr_1.0.2         
[46] listenv_0.9.1        splines_4.3.3        grid_4.3.3           colorspace_2.1-0     cli_3.6.3           
[51] magrittr_2.0.3       XML_3.99-0.17        survival_3.5-8       utf8_1.2.4           future.apply_1.11.2 
[56] e1071_1.7-16         withr_3.0.1          scales_1.3.0         sp_2.1-3             lubridate_1.9.3     
[61] timechange_0.3.0     globals_0.16.3       nnet_7.3-19          timeDate_4041.110    hardhat_1.4.0       
[66] caret_6.0-94         rlang_1.1.4          Rcpp_1.0.12          glue_1.7.0           DBI_1.2.3           
[71] pROC_1.18.5          ipred_0.9-15         rstudioapi_0.16.0    R6_2.5.1             plyr_1.8.9          
[76] units_0.8-5 
smckenzie1986 commented 5 days ago

I think I have found a solution that fixes this issue! Here is my proposed solution:

histMatch2<-function (x, ref, xmask = NULL, refmask = NULL, nSamples = 1e+05, 
          intersectOnly = TRUE, paired = TRUE, forceInteger = FALSE, 
          returnFunctions = FALSE, ...) 
{
  nSamples <- min(ncell(ref), nSamples, ncell(ref))
  x <- RStoolbox:::.toTerra(x)
  ref <- RStoolbox:::.toTerra(ref)
  ext <- if (paired | intersectOnly) 
    intersect(ext(x), ext(ref))
  if (paired & is.null(ext)) {
    paired <- FALSE
    warning("Rasters do not overlap. Precise sampling disabled.", 
            call. = FALSE)
  }
  if (nlyr(x) != nlyr(ref)) 
    stop("x and ref must have the same number of layers.")
  if (!is.null(xmask)) {
    RStoolbox:::.vMessage("Apply xmask")
    xfull <- x
    x <- mask(x, xmask)
  }
  if (!is.null(refmask)) {
    .vMessage("Apply refmask")
    ref <- c(ref, refmask)
  }
  RStoolbox:::.vMessage("Extract samples")
  ref.sample <- as.matrix(spatSample(ref, size = nSamples, 
                                     na.rm = TRUE, ext = ext, xy = paired))
  if (!is.null(refmask)) 
    ref.sample <- ref.sample[, -ncol(ref.sample), drop = FALSE]
  if (paired) {
    x.sample <- extract(x, ref.sample[, c("x", "y")])
    if (is.vector(x.sample)) 
      x.sample <- as.matrix(x.sample)
    valid <- complete.cases(x.sample)
    ref.sample <- ref.sample[valid, -c(1:2), drop = FALSE]
    x.sample <- x.sample[valid, , drop = FALSE]
  }
  else {
    x.sample <- as.matrix(spatSample(x, size = nSamples, 
                                     na.rm = T, ext = ext))
  }
  RStoolbox:::.vMessage("Calculate empirical cumulative histograms")
  layerFun <- lapply(1:ncol(x.sample), function(i) {
    source.ecdf <- ecdf(x.sample[, i])
    ecdf.ref <- ecdf(ref.sample[, i])
    kn <- knots(ecdf.ref)
    y <- ecdf.ref(kn)
    minmax <- c(min(values(ref)[i]), max(values(ref)[i]))
    limits <- if (is.na(minmax[1]) || is.na(minmax[2])) {
      range(ref.sample)
    }
    else {
      minmax
    }
    inverse.ref.ecdf <- approxfun(y, kn, method = "linear", 
                                  yleft = limits[1], yright = limits[2], ties = "ordered")
    histMatchFun <- if (grepl("INT", terra::datatype(ref)[i]) | 
                        forceInteger) 
      function(values, na.rm = FALSE) {
        round(inverse.ref.ecdf(source.ecdf(values)))
      }
    else {
      function(values, na.rm = FALSE) {
        inverse.ref.ecdf(source.ecdf(values))
      }
    }
    histMatchFun
  })

  appfun<-function(x, f){
    result<-app(x, f)
    return(result)
  }

  tmp<-lapply(1:nlyr(x), function(i, x, f) appfun(x[[i]], f[[i]]), f=layerFun, x=x)

  out<-do.call(c, tmp)

  if (returnFunctions) {
    names(layerFun) <- names(x)
    return(layerFun)
  }
  RStoolbox:::.vMessage("Apply histogram match functions")

  if (!is.null(xmask)) 
    out <- merge(xfull, out, ..., overwrite = TRUE)
  names(out) <- names(x)
  out
}

And you can check the results with the example rasters from above with this code:

HM2<-histMatch2(r1, r2, intersectOnly = FALSE, paired=FALSE)

par(mfrow=c(4,3))
plotRGB(r1[[c(1:3)]], scale=15000, main="Original")
plotRGB(r2[[c(1:3)]], scale=15000, main="Reference")
plotRGB(HM2[[c(1:3)]], scale=15000, main="Revised HistMatched")
hist(r1[[1]], breaks=100, xlim=c(0, 10000), main="Original Band 1")
hist(r2[[1]], breaks=100, xlim=c(0, 10000), main="Reference Band 1")
hist(HM2[[1]], breaks=100, xlim=c(0, 10000), main="Revised HistMatched Band 1")
hist(r1[[2]], breaks=100, xlim=c(0, 10000), main="Original Band 2")
hist(r2[[2]], breaks=100, xlim=c(0, 10000), main="Reference Band 2")
hist(HM2[[2]], breaks=100, xlim=c(0, 10000), main="Revised HistMatched Band 2")
hist(r1[[3]], breaks=100, xlim=c(0, 10000), main="Original Band 3")
hist(r2[[3]], breaks=100, xlim=c(0, 10000), main="Reference Band 3")
hist(HM2[[3]], breaks=100, xlim=c(0, 10000), main="Revised HistMatched Band 3")