Closed DominiqueMakowski closed 3 months ago
And here's the full stan code of the following model:
m <- ordbetareg::ordbetareg(formula=vs ~ mpg, data=mtcars)
brms::stancode(m)
// generated with brms 2.21.1
functions {
real ord_beta_reg_lpdf(real y, real mu, real phi, real cutzero, real cutone) {
vector[2] thresh;
thresh[1] = cutzero;
thresh[2] = cutzero + exp(cutone);
if(y==0) {
return log1m_inv_logit(mu - thresh[1]);
} else if(y==1) {
return log_inv_logit(mu - thresh[2]);
} else {
return log_diff_exp(log_inv_logit(mu - thresh[1]), log_inv_logit(mu - thresh[2])) +
beta_lpdf(y|exp(log_inv_logit(mu) + log(phi)),exp(log1m_inv_logit(mu) + log(phi)));
}
}
real induced_dirichlet_lpdf(real nocut, vector alpha, real phi, int cutnum, real cut1, real cut2) {
int K = num_elements(alpha);
vector[K-1] c = [cut1, cut1 + exp(cut2)]';
vector[K - 1] sigma = inv_logit(phi - c);
vector[K] p;
matrix[K, K] J = rep_matrix(0, K, K);
if(cutnum==1) {
// Induced ordinal probabilities
p[1] = 1 - sigma[1];
for (k in 2:(K - 1))
p[k] = sigma[k - 1] - sigma[k];
p[K] = sigma[K - 1];
// Baseline column of Jacobian
for (k in 1:K) J[k, 1] = 1;
// Diagonal entries of Jacobian
for (k in 2:K) {
real rho = sigma[k - 1] * (1 - sigma[k - 1]);
J[k, k] = - rho;
J[k - 1, k] = rho;
}
// divide in half for the two cutpoints
// don't forget the ordered transformation
return dirichlet_lpdf(p | alpha)
+ log_determinant(J) + cut2;
} else {
return(0);
}
}
real induced_dirichlet_rng(vector alpha, real phi, int cutnum, real cut1, real cut2) {
int K = num_elements(alpha);
vector[K] p;
vector[K-1] cutpoints;
// need to reverse the steps
// first get the dirichlet probabilities conditional on alpha
p = dirichlet_rng(alpha);
// then do the *reverse* transformation to get cutpoints
for(k in 1:(K-1)) {
if(k==1) {
cutpoints[k] = phi - logit(1 - p[k]);
} else {
cutpoints[k] = phi - logit(inv_logit(phi - cutpoints[k-1]) - p[k]);
}
}
return cutpoints[cutnum];
}
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int<lower=1> Kc; // number of population-level effects after centering
int prior_only; // should the likelihood be ignored?
}
transformed data {
matrix[N, Kc] Xc; // centered version of X without an intercept
vector[Kc] means_X; // column means of X before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
}
parameters {
vector[Kc] b; // regression coefficients
real Intercept; // temporary intercept for centered predictors
real<lower=0> phi; // precision parameter
real cutzero;
real cutone;
}
transformed parameters {
real lprior = 0; // prior contributions to the log posterior
lprior += normal_lpdf(b | 0,5);
lprior += student_t_lpdf(Intercept | 3, 0, 2.5);
lprior += exponential_lpdf(phi | 0.1);
lprior += induced_dirichlet_lpdf(cutzero | [1,1,1]', 0, 1,cutzero,cutone);
lprior += induced_dirichlet_lpdf(cutone | [1,1,1]', 0, 2,cutzero,cutone);
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = rep_vector(0.0, N);
mu += Intercept + Xc * b;
for (n in 1:N) {
target += ord_beta_reg_lpdf(Y[n] | mu[n], phi, cutzero, cutone);
}
}
// priors including constants
target += lprior;
}
generated quantities {
// actual population-level intercept
real b_Intercept = Intercept - dot_product(means_X, b);
}
Looks like the goal would be for something like this?
using RDatasets
using TuringGLM
mtcars = dataset("datasets", "mtcars")
fm = @formula(vs ~ mpg)
model = turing_model(fm, data; model= OrderedBeta);
Seems robustly supported across languages and has been used for a number of applications. What is the closest implementation in Distribution.jl? This might be the best place to start. After that, it would still mean porting over to TuringGLM if we wanted the nice syntax above. Either way, a custom distribution would be the starting place.
Here is what we could start with
using Distributions
using StatsFuns: logistic, logit, log1pexp
using SpecialFunctions: logbeta
using Random
import Distributions: logpdf, pdf, cdf, quantile
import Random: rand
"""
OrderedBeta(μ, ϕ, k1, k2)
Ordered Beta distribution with parameters:
- `μ`: location parameter on the logit scale
- `ϕ`: precision parameter (must be positive)
- `k1`: first cutpoint
- `k2`: log of the difference between the second and first cutpoints
The distribution is defined on the interval [0, 1] with additional point masses at 0 and 1.
"""
struct OrderedBeta{T<:Real} <: ContinuousUnivariateDistribution
μ::T
ϕ::T
k1::T
k2::T
beta_dist::Beta{T}
function OrderedBeta{T}(μ::T, ϕ::T, k1::T, k2::T) where T<:Real
@assert ϕ > 0 "ϕ must be positive"
@assert k1 < k2 "k1 must be less than k2"
new{T}(μ, ϕ, k1, k2, Beta(logistic(μ) * ϕ, (1 - logistic(μ)) * ϕ))
end
end
OrderedBeta(μ::T, ϕ::T, k1::T, k2::T) where T<:Real = OrderedBeta{T}(μ, ϕ, k1, k2)
function OrderedBeta(μ::Real, ϕ::Real, k1::Real, k2::Real)
T = promote_type(typeof(μ), typeof(ϕ), typeof(k1), typeof(k2))
OrderedBeta(T(μ), T(ϕ), T(k1), T(k2))
end
params(d::OrderedBeta) = (d.μ, d.ϕ, d.k1, d.k2)
minimum(::OrderedBeta) = 0
maximum(::OrderedBeta) = 1
insupport(::OrderedBeta, x::Real) = 0 ≤ x ≤ 1
function logpdf(d::OrderedBeta, x::Real)
μ, ϕ, k1, k2 = params(d)
thresh = [k1, k1 + exp(k2)]
if x == 0
return log1p(-logistic(μ - thresh[1]))
elseif x == 1
return log(logistic(μ - thresh[2]))
elseif 0 < x < 1
log_p_middle = log1pexp(μ - thresh[1]) - log1pexp(μ - thresh[2])
return log_p_middle + logpdf(d.beta_dist, x)
else
return -Inf
end
end
pdf(d::OrderedBeta, x::Real) = exp(logpdf(d, x))
loglikelihood(d::OrderedBeta, x::Real) = logpdf(d, x)
function rand(rng::Random.AbstractRNG, d::OrderedBeta)
μ, ϕ, k1, k2 = params(d)
thresh = [k1, k1 + exp(k2)]
u = rand(rng)
if u <= 1 - logistic(μ - thresh[1])
return zero(μ)
elseif u >= 1 - logistic(μ - thresh[2])
return one(μ)
else
return rand(rng, d.beta_dist)
end
end
rand(d::OrderedBeta) = rand(Random.GLOBAL_RNG, d)
rand(rng::Random.AbstractRNG, d::OrderedBeta, n::Int) = [rand(rng, d) for _ in 1:n]
rand(d::OrderedBeta, n::Int) = rand(Random.GLOBAL_RNG, d, n)
Distributions.sampler(d::OrderedBeta) = d
function cdf(d::OrderedBeta, x::Real)
μ, ϕ, k1, k2 = params(d)
thresh = [k1, k1 + exp(k2)]
if x <= 0
return zero(μ)
elseif x >= 1
return one(μ)
else
p_0 = 1 - logistic(μ - thresh[1])
p_middle = logistic(μ - thresh[1]) - logistic(μ - thresh[2])
return p_0 + p_middle * cdf(d.beta_dist, x)
end
end
function quantile(d::OrderedBeta, q::Real)
0 <= q <= 1 || throw(DomainError(q, "quantile must be in [0, 1]"))
μ, ϕ, k1, k2 = params(d)
thresh = [k1, k1 + exp(k2)]
p_0 = 1 - logistic(μ - thresh[1])
p_1 = logistic(μ - thresh[2])
if q <= p_0
return zero(μ)
elseif q >= 1 - p_1
return one(μ)
else
p_middle = logistic(μ - thresh[1]) - logistic(μ - thresh[2])
q_adjusted = (q - p_0) / p_middle
return quantile(d.beta_dist, q_adjusted)
end
end
mean(d::OrderedBeta) = logistic(d.μ)
var(d::OrderedBeta) = logistic(d.μ) * (1 - logistic(d.μ)) / (1 + d.ϕ)
#test (needs validation)
d = OrderedBeta(0.5, 1.0, 1.0, 5.0)
samples = rand(d, 1000)
x = 0.5
println("pdf at $x: ", pdf(d, x))
println("logpdf at $x: ", logpdf(d, x))
println("CDF at $x: ", cdf(d, x))
println("Quantile at 0.7: ", quantile(d, 0.7))
println("Mean: ", mean(d))
println("Variance: ", var(d))
I've been trying to make sense out of this for the past days, but thanks so much @kiante-fernandez that's tremendously helpful (and it convinced me we can have a far more complete and elegant solution in Julia than currently exists elsewhere, with the ability to draw random from the distribution etc. that is currently impossible to do in R).
As a context, I think this model has the potential of being widely used (bounded and possibly zero-one inflated data is probably the most common in psychology), so it's worth trying to get that right.
I'm trying to take it slowly, coz' as you know I'm not good with equations, so that's been hard. I am starting with the random sample generation method.
One thing I would potentially like to do is to remove the hard-coded link function that exists in the original code: i.e., parameters are on the logit or log scale, mostly because that's how they are used in regression models. However, since in Julia we can dissociate the "distribution" creation from the model itself, I'd like to see the parameters of the model expressed in their true values, so that user can specify the link functions they want in their model.
As I was trying to do that (meaning to reverse-transform some of the logistic()
calls), I realized that it would also be convenient, rather than having "k2" as a difference from "k1", to have both points expressed as independent departures from their extremes (i.e., zero and one, respectively).
Because of these changes, I for now renamed the distribution ExtremeBeta()
which aims to be a variant of OrderedBeta()
.
The whole idea is that there are 2 cutpoints, renamed k0 and k1, that indicate departures from 0 and 1 beyond which the probability of being an extreme value (zero or one) increases.
The reason for this reparametrization is that I think it allows for nice way of setting priors as: truncated(Normal(0, 0.1), 0, 0.5)
for both k0 and k1 (as ideally they would not be > 0.5 so that each threshold stays on its side).
For instance, if k0=0.1 and k1=0.1, that means that the 2 "thresholds" are placed at respectively 0.1
(0+k0) and 0.9
(1-k1).
The probabilities of a random point of being either 1
, 0
, or from the Beta distribution
change depending on how far they are from these thresholds.
I think I successfully managed to implement the random generation based on the equations from the paper (in eq. 5 they defined probabilities alpha, gamma and delta that are the probability of being 0, 1 and in-between respectively).
I'm sorry to have dragged you into this @kiante-fernandez but do you think that makes sense? And how would you go about defining the rest of the methods (in particular the pdf) from there so that it can be used in Turing?
(as a side note, I also dipped my feet in package creation in Julia, let me know if there's anything we can improve here :) )
Thanks a lot!
The new ordered beta works very well so there's no need for the "Extreme Beta" variant as it basically Ordered Beta
I need some help with implementing a very useful model that is a modified Beta model that can accommodate zeros and ones. @kiante-fernandez I was wondering if you could throw a quick look at it and let me know whether it looks feasible or not.