wilkelab / ggridges

Ridgeline plots in ggplot2
https://wilkelab.org/ggridges
GNU General Public License v2.0
411 stars 31 forks source link

Mismatch between quantile_lines and fill=..quantile.. in density_ridges_gradient #30

Closed d0rg0ld closed 5 years ago

d0rg0ld commented 5 years ago

Thanks for this great package!

I played around a bit and ran into the following issue which I could not resolve, thought that it is worth reporting:

Using the attached test data with the code below results in a mismatch between the quantile lines and the shaded areas (unfortunately not) between them ... Do you have any idea what might cause this behavior?

library(ggplot2)
library(ggridges)
testdata=read.table(file="ggridges_quantile_testdata.tsv", sep="\t", header=T)
p=ggplot(testdata, aes(x=val, y=factor(variable), fill=..quantile..)) +
  stat_density_ridges(geom = "density_ridges_gradient",
                      scale=1, 
                      quantiles=c(0.05,0.25,0.33,0.5,0.66,0.75, 0.95),
                      quantile_lines = TRUE,
                      calc_ecdf = TRUE
  ) + 
  scale_fill_manual(values=c("black", "darkgray", "lightgray", "white","white","lightgray", "darkgray", "black"), labels= c("0-0.05","0.05-0.25","0.25-0.33","0.33-0.5","0.5-0.66","0.66-0.75", "0.75-0.95", "0.95-1.00")) +
  coord_cartesian(xlim = c(-250, 250)) 
print(p)

ggridges_quantile_testdata

ggridges_quantile_testdata.tsv.zip

clauswilke commented 5 years ago

Please look if you can make a self-contained example that doesn't need an external file, and please run it through the reprex package with reprex(si = TRUE). Thanks!

d0rg0ld commented 5 years ago

Ok, thanks for the quick reply, will try to provide this the next days ...

d0rg0ld commented 5 years ago
library(ggplot2)
library(ggridges)
#> 
#> Attache Paket: 'ggridges'
#> The following object is masked from 'package:ggplot2':
#> 
#>     scale_discrete_manual

randfunc=function(samples, mu, sigma, delta, scalex) {
  #from https://www.r-bloggers.com/another-skewed-normal-distribution/
  Z = rlnorm(samples, mu, sigma)
  X = as.integer(rnorm(samples, Z, delta)*scalex)
}
samples=10000
mu1=-1
sigma1=1.84
delta1=4.2
mu2=-1.5
sigma2=1.92
delta2=3.8
testdata=data.frame("A",randfunc(samples, mu1, sigma1, delta1, 20))
colnames(testdata)=c("variable", "val")
testdata2=data.frame("B",randfunc(samples, mu2, sigma2, delta2, -20))
colnames(testdata2)=c("variable", "val")
testdata=rbind(testdata, testdata2)

p=ggplot(testdata, aes(x=val, y=factor(variable), fill=..quantile..)) +
  stat_density_ridges(geom = "density_ridges_gradient",
                      scale=1, 
                      quantiles=c(0.05,0.25,0.33,0.5,0.66,0.75, 0.95),
                      quantile_lines = TRUE,
                      calc_ecdf = TRUE
  ) + 
  scale_fill_manual(values=c("black", 
                             "darkgray", 
                             "lightgray", 
                             "white",
                             "white",
                             "lightgray", 
                             "darkgray", 
                             "black")) +
  coord_cartesian(xlim = c(-450, 450)) 
print(p)
#> Picking joint bandwidth of 12.5

Created on 2019-01-19 by the reprex package (v0.2.1)

Session info ``` r devtools::session_info() #> ─ Session info ────────────────────────────────────────────────────────── #> setting value #> version R version 3.3.3 Patched (2017-08-09 r75161) #> os OS X Yosemite 10.10.5 #> system x86_64, darwin13.4.0 #> ui X11 #> language (EN) #> collate de_AT.UTF-8 #> ctype de_AT.UTF-8 #> tz Europe/Vienna #> date 2019-01-19 #> #> ─ Packages ────────────────────────────────────────────────────────────── #> package * version date lib source #> assertthat 0.2.0 2017-04-11 [1] CRAN (R 3.3.2) #> backports 1.1.2 2017-12-13 [1] CRAN (R 3.3.2) #> base64enc 0.1-3 2015-07-28 [1] CRAN (R 3.3.0) #> bindr 0.1.1 2018-03-13 [1] CRAN (R 3.3.3) #> bindrcpp 0.2.2 2018-03-29 [1] CRAN (R 3.3.3) #> callr 3.0.0 2018-08-24 [1] CRAN (R 3.3.3) #> cli 1.0.1 2018-09-25 [1] CRAN (R 3.3.3) #> colorspace 1.4-0 2019-01-13 [1] CRAN (R 3.3.3) #> crayon 1.3.4 2017-09-16 [1] CRAN (R 3.3.2) #> curl 3.2 2018-03-28 [1] CRAN (R 3.3.3) #> desc 1.2.0 2018-05-01 [1] CRAN (R 3.3.3) #> devtools 2.0.1 2018-10-26 [1] CRAN (R 3.3.3) #> digest 0.6.18 2018-10-10 [1] CRAN (R 3.3.3) #> dplyr 0.7.8 2018-11-10 [1] CRAN (R 3.3.3) #> evaluate 0.12 2018-10-09 [1] CRAN (R 3.3.3) #> fs 1.2.6 2018-08-23 [1] CRAN (R 3.3.3) #> ggplot2 * 3.1.0 2018-10-25 [1] CRAN (R 3.3.3) #> ggridges * 0.5.1 2019-01-13 [1] Github (clauswilke/ggridges@1d5131f) #> glue 1.3.0 2018-07-17 [1] CRAN (R 3.3.3) #> gtable 0.2.0 2016-02-26 [1] CRAN (R 3.3.0) #> htmltools 0.3.6 2017-04-28 [1] CRAN (R 3.3.2) #> httr 1.3.1 2017-08-20 [1] CRAN (R 3.3.2) #> knitr 1.20 2018-02-20 [1] CRAN (R 3.3.3) #> labeling 0.3 2014-08-23 [1] CRAN (R 3.3.0) #> lazyeval 0.2.1 2017-10-29 [1] CRAN (R 3.3.2) #> magrittr 1.5 2014-11-22 [1] CRAN (R 3.3.0) #> memoise 1.1.0 2017-04-21 [1] CRAN (R 3.3.2) #> mime 0.6 2018-10-05 [1] CRAN (R 3.3.3) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 3.3.3) #> pillar 1.3.1 2018-12-15 [1] CRAN (R 3.3.3) #> pkgbuild 1.0.2 2018-10-16 [1] CRAN (R 3.3.3) #> pkgconfig 2.0.2 2018-08-16 [1] CRAN (R 3.3.3) #> pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.3.3) #> plyr 1.8.4 2016-06-08 [1] CRAN (R 3.3.0) #> prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.3.0) #> processx 3.2.0 2018-08-16 [1] CRAN (R 3.3.3) #> ps 1.2.1 2018-11-06 [1] CRAN (R 3.3.3) #> purrr 0.2.5 2018-05-29 [1] CRAN (R 3.3.3) #> R6 2.3.0 2018-10-04 [1] CRAN (R 3.3.3) #> Rcpp 1.0.0 2018-11-07 [1] CRAN (R 3.3.3) #> remotes 2.0.2 2018-10-30 [1] CRAN (R 3.3.3) #> rlang 0.3.1 2019-01-08 [1] CRAN (R 3.3.3) #> rmarkdown 1.11 2018-12-08 [1] CRAN (R 3.3.3) #> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.3.2) #> scales 1.0.0 2018-08-09 [1] CRAN (R 3.3.3) #> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.3.3) #> stringi 1.2.4 2018-07-20 [1] CRAN (R 3.3.3) #> stringr 1.3.1 2018-05-10 [1] CRAN (R 3.3.3) #> tibble 2.0.1 2019-01-12 [1] CRAN (R 3.3.3) #> tidyselect 0.2.5 2018-10-11 [1] CRAN (R 3.3.3) #> usethis 1.4.0 2018-08-14 [1] CRAN (R 3.3.3) #> withr 2.1.2 2018-03-15 [1] CRAN (R 3.3.3) #> xml2 1.2.0 2018-01-24 [1] CRAN (R 3.3.3) #> yaml 2.2.0 2018-07-25 [1] CRAN (R 3.3.3) #> #> [1] /Library/Frameworks/R.framework/Versions/3.3/Resources/library ```
d0rg0ld commented 5 years ago

this should do

clauswilke commented 5 years ago

The problem is that you're calculating the densities over a huge x range and then zoom in to a very small portion of that. In that portion, the density function jumps in huge x steps and there's not enough resolution to get accurate shading by quantiles. If you limit the x axis appropriately, things work fine.

library(ggplot2)
library(ggridges)
#> 
#> Attaching package: 'ggridges'
#> The following object is masked from 'package:ggplot2':
#> 
#>     scale_discrete_manual

randfunc=function(samples, mu, sigma, delta, scalex) {
  #from https://www.r-bloggers.com/another-skewed-normal-distribution/
  Z = rlnorm(samples, mu, sigma)
  X = as.integer(rnorm(samples, Z, delta)*scalex)
}
samples=10000
mu1=-1
sigma1=1.84
delta1=4.2
mu2=-1.5
sigma2=1.92
delta2=3.8
testdata=data.frame("A",randfunc(samples, mu1, sigma1, delta1, 20))
colnames(testdata)=c("variable", "val")
testdata2=data.frame("B",randfunc(samples, mu2, sigma2, delta2, -20))
colnames(testdata2)=c("variable", "val")
testdata=rbind(testdata, testdata2)

p=ggplot(testdata, aes(x=val, y=factor(variable), fill=..quantile..)) +
  stat_density_ridges(geom = "density_ridges_gradient",
                      scale=1, 
                      quantiles=c(0.05,0.25,0.33,0.5,0.66,0.75, 0.95),
                      quantile_lines = TRUE
  ) + 
  scale_fill_manual(values=c("black", 
                             "darkgray", 
                             "lightgray", 
                             "white",
                             "white",
                             "lightgray", 
                             "darkgray", 
                             "black")) +
  scale_x_continuous(limits = c(-450, 450)) 
print(p)
#> Picking joint bandwidth of 12.3
#> Warning: Removed 210 rows containing non-finite values
#> (stat_density_ridges).

Created on 2019-01-20 by the reprex package (v0.2.1)

d0rg0ld commented 5 years ago

Thanks for the quick reply, but your suggestion to limit the range of values instead of zooming on the part of the plot created from their full range results in a misrepresentation of the quantiles, which has a strong effect on the representation of the original data. Moreover, I wonder why the quantile lines appear to be correct, suggesting that in principle it should be possible to shade the area between them accordingly.

clauswilke commented 5 years ago

Ok, I've added a parameter n that you can increase until you get the desired rendering resolution at any x axis scale.

library(ggplot2)
library(ggridges)
#> 
#> Attaching package: 'ggridges'
#> The following object is masked from 'package:ggplot2':
#> 
#>     scale_discrete_manual

randfunc=function(samples, mu, sigma, delta, scalex) {
  #from https://www.r-bloggers.com/another-skewed-normal-distribution/
  Z = rlnorm(samples, mu, sigma)
  X = as.integer(rnorm(samples, Z, delta)*scalex)
}
samples=10000
mu1=-1
sigma1=1.84
delta1=4.2
mu2=-1.5
sigma2=1.92
delta2=3.8
testdata=data.frame("A",randfunc(samples, mu1, sigma1, delta1, 20))
colnames(testdata)=c("variable", "val")
testdata2=data.frame("B",randfunc(samples, mu2, sigma2, delta2, -20))
colnames(testdata2)=c("variable", "val")
testdata=rbind(testdata, testdata2)

p=ggplot(testdata, aes(x=val, y=factor(variable), fill=..quantile..)) +
  stat_density_ridges(geom = "density_ridges_gradient",
                      scale=1, 
                      quantiles=c(0.05,0.25,0.33,0.5,0.66,0.75, 0.95),
                      quantile_lines = TRUE,
                      n = 2^12 # change is here, default n = 512
  ) + 
  scale_fill_manual(values=c("black", 
                             "darkgray", 
                             "lightgray", 
                             "white",
                             "white",
                             "lightgray", 
                             "darkgray", 
                             "black")) +
  coord_cartesian(xlim = c(-450, 450)) 
print(p)
#> Picking joint bandwidth of 12.6

Created on 2019-01-21 by the reprex package (v0.2.1)

d0rg0ld commented 5 years ago

Thanks a lot for the fix, this is very helpful for my specific use case and hopefully also for others.