weight data contribution to likelihood

IosuParadinas commented 1 year ago

Hi!

I am trying to combine two datasets. We pressume that one data source has bigger uncertainty, so we would like to give it a larger impact in the model. In INLA, one could use the "weights" argument to do so. Is there something similar in inlabru??

Simulated example fitted in INLA using the "weights" argument:

library(INLA)
library(tidyverse)
set.seed(123)

n1 <- 200
n2 <- 30

x=round(runif(n1+n2),2)
x1 <- inla.group(x,n=10)[1:n1]
x2 <- inla.group(x,n=10)[(n1+1):(n1+n2)]
z = c(inla.group(round(runif(n1),2),10),rep(NA,n2))

beta_x = 2
beta_x2 = -2
beta_z = 2

I create two datasets, one of them is driven by two non-linear covariates (x and y) and has larger uncertainty, while the other one is only driven by one covariate (x)

y1 <- rnorm(n1, mean = beta_x * x1 + beta_x2 * x1 ^ 2 + 2 + beta_z * z[1:n1] ^ 4 , sd =1.5) ### two covariates and large observational error (noisy)
y2 <- rnorm(n2, mean = beta_x * x2 + beta_x2 * x2 ^ 2 + 2 , sd =.1) ### only one covariate and more precise data

I want to infer the effect of covariate "Z" in "Y1". In INLA I would use the "weight" argument to assign larger weight to the more precise data.


### in INLA
data1 = data.frame(Y=y1,x1=x1,z=z[1:n1])
data2 = data.frame(Y=y2,x2=x2)
data = list(Y = cbind(c(y1,rep(NA,n2)),
                      y2 = c(rep(NA,n1),y2)),
            x1 = c(x1,rep(NA,n2)),
            x2 = c(rep(NA,n1),x2),
            z=z,
            b0_1 = c(rep(1,n1),rep(NA,n2)),
            b0_2 = c(rep(NA,n1),rep(1,n2))
)

### formulas
formula1 = Y ~  f(x1,model="rw2") + f(z,model="rw1") 
formula2 = Y ~  f(x2 ,model="rw2")
formula_joint = Y ~ -1 + b0_1 + b0_2 + f(x2 ,model="rw2") + f(x1,copy="x2")+ 
  f(z,model="rw1") 

## separate models
mod1 = inla(formula=formula1,
            data=data1,
            family = "gaussian")
mod2 = inla(formula=formula2,
            data=data2,
            family = "gaussian")

## joint model
jmod = inla(formula=formula_joint,
            data=data,
            family = c("gaussian","gaussian"))

## Likelihood weighted joint model
weight_lik1 = 1 ; weight_lik2 = 5
jmod_weighted = inla(formula=formula_joint, 
                     data=data,
                     family = c("gaussian","gaussian"),
                     weights = c(rep(weight_lik1,n1),rep(weight_lik2,n2)))

When visualizing the data one can see that the more realistic covariate effects are obtained when fitting the weighted joint model

#######################
###### Visu Covar X ###
#######################
covar_X_effect = ggplot(data.frame(x=seq(0,1,.05),f_x=beta_x*seq(0,1,.05) + beta_x2*seq(0,1,.05)^2)) + geom_line(aes(x,f_x))+
  ggtitle("Real X covariate effect")

p_x1 = mod1$summary.random$x1%>%
  ggplot()+ geom_line(aes(x=ID, y=mean)) +
  geom_line(aes(x=ID, y=`0.025quant`), lty=2)+
  geom_line(aes(x=ID, y=`0.975quant`), lty=2) + ggtitle("Noisy data")

p_x2 = mod2$summary.random$x2%>%
  ggplot()+ geom_line(aes(x=ID, y=mean)) +
  geom_line(aes(x=ID, y=`0.025quant`), lty=2)+
  geom_line(aes(x=ID, y=`0.975quant`), lty=2)  + ggtitle("Less noisy data")

p_x2j = jmod$summary.random$x2%>%
  ggplot()+ geom_line(aes(x=ID, y=mean)) +
  geom_line(aes(x=ID, y=`0.025quant`), lty=2)+
  geom_line(aes(x=ID, y=`0.975quant`), lty=2) + ggtitle("Joint")

p_x2j_weight = jmod_weighted$summary.random$x2%>%
  ggplot()+ geom_line(aes(x=ID, y=mean)) +
  geom_line(aes(x=ID, y=`0.025quant`), lty=2)+
  geom_line(aes(x=ID, y=`0.975quant`), lty=2)  + ggtitle("Joint (downweight noisy data)")

gridExtra::grid.arrange(p_x1,p_x2,p_x2j,p_x2j_weight,covar_X_effect,ncol=2,top="Covar X")

#######################
###### Visu Covar Z ###
#######################
covar_Z_effect = ggplot(data.frame(x=seq(0,1,.05),f_x=beta_z*seq(0,1,.05)^4)) + geom_line(aes(x,f_x))+
  ggtitle("Real Z covariate effect")

p_z1 = mod1$summary.random$x1%>%
  ggplot()+ geom_line(aes(x=ID, y=mean)) +
  geom_line(aes(x=ID, y=`0.025quant`), lty=2)+
  geom_line(aes(x=ID, y=`0.975quant`), lty=2) + ggtitle("Noisy data")

p_zj = jmod$summary.random$z%>%
  ggplot()+ geom_line(aes(x=ID, y=mean)) +
  geom_line(aes(x=ID, y=`0.025quant`), lty=2)+
  geom_line(aes(x=ID, y=`0.975quant`), lty=2) + ggtitle("Joint")

p_zj_weight = jmod_weighted$summary.random$z%>%
  ggplot()+ geom_line(aes(x=ID, y=mean)) +
  geom_line(aes(x=ID, y=`0.025quant`), lty=2)+
  geom_line(aes(x=ID, y=`0.975quant`), lty=2)  + ggtitle("Joint (downweight noisy data)")

gridExtra::grid.arrange(p_z1,p_zj,p_zj_weight,covar_Z_effect,ncol=2,top="Covar Z")