Closed d0rg0ld closed 5 years ago
Please look if you can make a self-contained example that doesn't need an external file, and please run it through the reprex package with reprex(si = TRUE)
. Thanks!
Ok, thanks for the quick reply, will try to provide this the next days ...
library(ggplot2)
library(ggridges)
#>
#> Attache Paket: 'ggridges'
#> The following object is masked from 'package:ggplot2':
#>
#> scale_discrete_manual
randfunc=function(samples, mu, sigma, delta, scalex) {
#from https://www.r-bloggers.com/another-skewed-normal-distribution/
Z = rlnorm(samples, mu, sigma)
X = as.integer(rnorm(samples, Z, delta)*scalex)
}
samples=10000
mu1=-1
sigma1=1.84
delta1=4.2
mu2=-1.5
sigma2=1.92
delta2=3.8
testdata=data.frame("A",randfunc(samples, mu1, sigma1, delta1, 20))
colnames(testdata)=c("variable", "val")
testdata2=data.frame("B",randfunc(samples, mu2, sigma2, delta2, -20))
colnames(testdata2)=c("variable", "val")
testdata=rbind(testdata, testdata2)
p=ggplot(testdata, aes(x=val, y=factor(variable), fill=..quantile..)) +
stat_density_ridges(geom = "density_ridges_gradient",
scale=1,
quantiles=c(0.05,0.25,0.33,0.5,0.66,0.75, 0.95),
quantile_lines = TRUE,
calc_ecdf = TRUE
) +
scale_fill_manual(values=c("black",
"darkgray",
"lightgray",
"white",
"white",
"lightgray",
"darkgray",
"black")) +
coord_cartesian(xlim = c(-450, 450))
print(p)
#> Picking joint bandwidth of 12.5
Created on 2019-01-19 by the reprex package (v0.2.1)
this should do
The problem is that you're calculating the densities over a huge x range and then zoom in to a very small portion of that. In that portion, the density function jumps in huge x steps and there's not enough resolution to get accurate shading by quantiles. If you limit the x axis appropriately, things work fine.
library(ggplot2)
library(ggridges)
#>
#> Attaching package: 'ggridges'
#> The following object is masked from 'package:ggplot2':
#>
#> scale_discrete_manual
randfunc=function(samples, mu, sigma, delta, scalex) {
#from https://www.r-bloggers.com/another-skewed-normal-distribution/
Z = rlnorm(samples, mu, sigma)
X = as.integer(rnorm(samples, Z, delta)*scalex)
}
samples=10000
mu1=-1
sigma1=1.84
delta1=4.2
mu2=-1.5
sigma2=1.92
delta2=3.8
testdata=data.frame("A",randfunc(samples, mu1, sigma1, delta1, 20))
colnames(testdata)=c("variable", "val")
testdata2=data.frame("B",randfunc(samples, mu2, sigma2, delta2, -20))
colnames(testdata2)=c("variable", "val")
testdata=rbind(testdata, testdata2)
p=ggplot(testdata, aes(x=val, y=factor(variable), fill=..quantile..)) +
stat_density_ridges(geom = "density_ridges_gradient",
scale=1,
quantiles=c(0.05,0.25,0.33,0.5,0.66,0.75, 0.95),
quantile_lines = TRUE
) +
scale_fill_manual(values=c("black",
"darkgray",
"lightgray",
"white",
"white",
"lightgray",
"darkgray",
"black")) +
scale_x_continuous(limits = c(-450, 450))
print(p)
#> Picking joint bandwidth of 12.3
#> Warning: Removed 210 rows containing non-finite values
#> (stat_density_ridges).
Created on 2019-01-20 by the reprex package (v0.2.1)
Thanks for the quick reply, but your suggestion to limit the range of values instead of zooming on the part of the plot created from their full range results in a misrepresentation of the quantiles, which has a strong effect on the representation of the original data. Moreover, I wonder why the quantile lines appear to be correct, suggesting that in principle it should be possible to shade the area between them accordingly.
Ok, I've added a parameter n
that you can increase until you get the desired rendering resolution at any x axis scale.
library(ggplot2)
library(ggridges)
#>
#> Attaching package: 'ggridges'
#> The following object is masked from 'package:ggplot2':
#>
#> scale_discrete_manual
randfunc=function(samples, mu, sigma, delta, scalex) {
#from https://www.r-bloggers.com/another-skewed-normal-distribution/
Z = rlnorm(samples, mu, sigma)
X = as.integer(rnorm(samples, Z, delta)*scalex)
}
samples=10000
mu1=-1
sigma1=1.84
delta1=4.2
mu2=-1.5
sigma2=1.92
delta2=3.8
testdata=data.frame("A",randfunc(samples, mu1, sigma1, delta1, 20))
colnames(testdata)=c("variable", "val")
testdata2=data.frame("B",randfunc(samples, mu2, sigma2, delta2, -20))
colnames(testdata2)=c("variable", "val")
testdata=rbind(testdata, testdata2)
p=ggplot(testdata, aes(x=val, y=factor(variable), fill=..quantile..)) +
stat_density_ridges(geom = "density_ridges_gradient",
scale=1,
quantiles=c(0.05,0.25,0.33,0.5,0.66,0.75, 0.95),
quantile_lines = TRUE,
n = 2^12 # change is here, default n = 512
) +
scale_fill_manual(values=c("black",
"darkgray",
"lightgray",
"white",
"white",
"lightgray",
"darkgray",
"black")) +
coord_cartesian(xlim = c(-450, 450))
print(p)
#> Picking joint bandwidth of 12.6
Created on 2019-01-21 by the reprex package (v0.2.1)
Thanks a lot for the fix, this is very helpful for my specific use case and hopefully also for others.
Thanks for this great package!
I played around a bit and ran into the following issue which I could not resolve, thought that it is worth reporting:
Using the attached test data with the code below results in a mismatch between the quantile lines and the shaded areas (unfortunately not) between them ... Do you have any idea what might cause this behavior?
ggridges_quantile_testdata.tsv.zip