obrl-soil / mpspline2

A revamped mass-preserving spline function for soils data
6 stars 2 forks source link

Negative input leads to "0"-inflated predictions #9

Open dylanbeaudette opened 1 month ago

dylanbeaudette commented 1 month ago

I just noticed that negative input values will result in 0s in the est_1cm predictions. These errors are evident in the depth interval predictions too.

I wouldn't have noticed this if I hadn't needed to convert values to z-scores.

# from package example
dat <- data.frame("SID" = c( 1,  1,  1,  1,   2,   2,   2,   2),
                  "UD" = c( 0, 20, 40, 60,   0,  15,  45,  80),
                  "LD" = c(10, 30, 50, 70,   5,  30,  60, 100),
                  "VAL" = c( 6,  4,  3, 10, 0.1, 0.9, 2.5,   6),
                  stringsAsFactors = FALSE)

# convert to z-score
dat$VAL <- scale(dat$VAL)

m <- mpspline(obj = dat, var_name = 'VAL')

# these aren't correct, all 0 or too small
m[[2]]$est_dcm

# top depth intervals are all 0
m[[2]]$est_1cm
dylanbeaudette commented 1 month ago

Found another, maybe more relevant, case in which loss of negative values is noticeable: EA-spline smoothing of soil color in CIELAB coordinates.

library(aqp)
library(mpspline2)

# example soil profile with some wild colors
x <- list(
  id = 'P1',
  depths = c(5, 25, 33, 100, 150),
  name = c('A', 'Bt1', 'Bt2', 'BC', 'Cr'),
  p1 = c(5, 25, 35, 10, 8),
  color = c('10YR 2/1', '7.5YR 3/3', '2.5Y 8/2', '2.5YR 4/6', '5GY 6/4'),
  hzd = c('clear', 'clear', 'abrupt', 'gradual', NA)
)

# init SPC
x <- quickSPC(x)
x$hzd <- hzDistinctnessCodeToOffset(x$hzd)

# convert Munsell -> sRGB in hex notation
x$col_source <- parseMunsell(x$color)

# convert Munsell -> CIELAB
.lab <- parseMunsell(x$color, convertColors = TRUE, returnLAB  = TRUE)

# shortcut to splice-in CIELAB color coordinates
replaceHorizons(x) <- cbind(horizons(x), .lab)

# check
plotSPC(x, color = 'L', hz.distinctness.offset = 'hzd')

# hack to smooth multiple variables
# future enhancement to spc2mpspline()
.spcL <- spc2mpspline(x, var_name = 'L', lam = 0.1, method = 'est_1cm')
.spcA <- spc2mpspline(x, var_name = 'A', lam = 0.1, method = 'est_1cm')
.spcB <- spc2mpspline(x, var_name = 'B', lam = 0.1, method = 'est_1cm')

m <- .spcL
m$A_spline <- .spcA$A_spline
m$B_spline <- .spcB$B_spline

# check
# ... negative numbers truncated at 0
par(mar = c(0, 0, 3, 3))
plotSPC(m, color = 'L_spline', name = NA, lwd = 0, divide.hz = FALSE)
plotSPC(m, color = 'A_spline', name = NA, lwd = 0, divide.hz = FALSE)
plotSPC(m, color = 'B_spline', name = NA, lwd = 0, divide.hz = FALSE)

# back-transform to Munsell at this point
.lab <- horizons(m)[, c('L_spline', 'A_spline', 'B_spline')]
names(.lab) <- c('L', 'A', 'B')

# interesting...
.mun <- col2Munsell(.lab, space = 'CIELAB')
table(.mun$hue)
table(.mun$value)
table(.mun$chroma)

# convert smoothed CIELAB -> sRGB
.srgb <- convertColor(horizons(m)[, c('L_spline', 'A_spline', 'B_spline')], from = 'Lab', to = 'sRGB', from.ref.white = 'D65', to.ref.white = 'D65')

# sRGB -> hex notation
m$col_spline <- rgb(.srgb, maxColorValue = 1)

# ok
plotSPC(m, color = 'col_spline', name = NA, lwd = 0, divide.hz = FALSE)

# normalize names and combine SPCs
m$soil_color <- m$col_spline
x$soil_color <- x$col_source

profile_id(m) <- 'P1-EA Spline'

z <- combine(x, m)

# compare side by side
par(mar = c(0, 0, 0, 3))
plotSPC(z, color = 'soil_color', name = NA, lwd = 0, divide.hz = FALSE, cex.names = 1)

# green hues lost due to truncation of smoothed values at x>=0

image