Closed Pakillo closed 1 year ago
Now with simulated data:
set.seed(8)
om <- rlnorm(100, meanlog = 1.5, sdlog = 1)
hist(om, breaks = 50)
summary(om)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.2199 2.1947 4.1499 6.8813 8.1795 48.2349
oc <- rlnorm(100, log(0.6*om), 0.5)
hist(oc, breaks = 50)
summary(oc)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.07848 1.04400 2.51161 4.58285 5.61627 36.91661
plot(oc ~ om)
plot(log(oc) ~ log(om))
df <- data.frame(om = om, oc = oc)
library(visreg)
library(DHARMa)
#> This is DHARMa 0.4.6. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
## 1. Linear model
m1 <- lm(oc ~ om, data = df)
summary(m1)
#>
#> Call:
#> lm(formula = oc ~ om, data = df)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -11.2993 -1.0384 -0.4855 0.5393 24.6540
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.62094 0.46198 1.344 0.182
#> om 0.57575 0.04427 13.005 <2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 3.473 on 98 degrees of freedom
#> Multiple R-squared: 0.6331, Adjusted R-squared: 0.6294
#> F-statistic: 169.1 on 1 and 98 DF, p-value: < 2.2e-16
visreg(m1)
simulateResiduals(m1, plot = TRUE)
#> Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
#>
#> Scaled residual values: 0.556 0.124 0.352 0.384 0.444 0.352 0.548 0.396 0.48 0.504 0.436 0.284 0.596 0.348 0.792 0.548 0.66 0.708 0.028 0.44 ...
summary(fitted(m1))
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.7476 1.8845 3.0103 4.5828 5.3302 28.3920
summary(df$oc, na.rm = TRUE)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.07848 1.04400 2.51161 4.58285 5.61627 36.91661
plot(df$oc, fitted(m1), xlim = c(0, 8), ylim = c(0, 8))
abline(a = 0, b = 1, col = "red")
cor(df$oc, fitted(m1))
#> [1] 0.7956948
## 2. lm(log(oc))
m2 <- lm(log(oc) ~ log(om), data = df)
summary(m2)
#>
#> Call:
#> lm(formula = log(oc) ~ log(om), data = df)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1.20306 -0.40277 0.07664 0.34591 1.22284
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -0.51697 0.08854 -5.839 6.83e-08 ***
#> log(om) 1.00314 0.05004 20.047 < 2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.537 on 98 degrees of freedom
#> Multiple R-squared: 0.804, Adjusted R-squared: 0.802
#> F-statistic: 401.9 on 1 and 98 DF, p-value: < 2.2e-16
visreg(m2)
simulateResiduals(m2, plot = TRUE)
#> Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
#>
#> Scaled residual values: 0.664 0.012 0.072 0.056 0.488 0.316 0.72 0.36 0.588 0.728 0.448 0.184 0.74 0.12 0.924 0.924 0.584 0.924 0.116 0.432 ...
summary(exp(fitted(m2)))
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.1305 1.3120 2.4858 4.1345 4.9099 29.1156
summary(df$oc, na.rm = TRUE)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.07848 1.04400 2.51161 4.58285 5.61627 36.91661
plot(df$oc, exp(fitted(m2)), xlim = c(0, 8), ylim = c(0, 8))
abline(a = 0, b = 1, col = "red")
cor(df$oc, fitted(m2))
#> [1] 0.7033441
## 3. lognormal glm
library(VGAM)
#> Loading required package: stats4
#> Loading required package: splines
m3 <- vglm(oc ~ log(om), data = df, family = lognormal)
summary(m3)
#>
#> Call:
#> vglm(formula = oc ~ log(om), family = lognormal, data = df)
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(>|z|)
#> (Intercept):1 -0.51697 0.08765 -5.898 3.67e-09 ***
#> (Intercept):2 -0.63181 0.07071 -8.935 < 2e-16 ***
#> log(om) 1.00314 0.04954 20.250 < 2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Names of linear predictors: meanlog, loglink(sdlog)
#>
#> Log-likelihood: -168.1262 on 197 degrees of freedom
#>
#> Number of Fisher scoring iterations: 9
#>
#> No Hauck-Donner effect found in any of the estimates
plot(fitted(m3) ~ log(df$om))
summary(fitted(m3))
#> V1
#> Min. : 0.1503
#> 1st Qu.: 1.5111
#> Median : 2.8631
#> Mean : 4.7621
#> 3rd Qu.: 5.6551
#> Max. :33.5350
summary(df$oc, na.rm = TRUE)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.07848 1.04400 2.51161 4.58285 5.61627 36.91661
plot(df$oc, fitted(m3), xlim = c(0, 8), ylim = c(0, 8))
abline(a= 0, b = 1, col = "red")
cor(df$oc, fitted(m3))
#> [,1]
#> [1,] 0.7954332
Created on 2023-05-22 with reprex v2.0.2
lognormal models slightly better, even though lm(log) don't perform bad. lm models on absolute scales do perform badly. See #26
Very nice tests Paco! Thanks!! Yes, I agree using a lognormal or lm(log)
Created on 2023-05-22 with reprex v2.0.2