lnccbrown / HSSM

Development of HSSM package
Other
70 stars 10 forks source link

Convergence issues when running model with categorical covariates in hierarchy #462

Open AlexanderFengler opened 2 weeks ago

AlexanderFengler commented 2 weeks ago

Synthetic data:

# Generated dataset
n_subjects = 25  # number of subjects
n_trials = 50  # number of trials per subject - vary from low to high values to check shrinkage
sd_v = 0.3  # sd for v-intercept (also used for a)
mean_v = 1.25  # mean for v-intercept
mean_vx = 0.8 # mean for slope of x onto v
mean_vy = 0.2 # mean for slope of x onto v
sd_tz=0.1
mean_a = 1.5
mean_t = 0.5
mean_z = 0.5
data_list = []
param_list =[]
for i in range(n_subjects):
    # Make parameters for subject i
    intercept = np.random.normal(mean_v, sd_v, size=1)
    x = np.random.uniform(-1, 1, size=n_trials)
    y = np.random.uniform(-1, 1, size=n_trials)
    v_x = np.random.normal(mean_vx, sd_v, size=1)
    v_y = np.random.normal(mean_vy, sd_v, size=1)
    v = intercept + (v_x * x) + (v_y * y)
    a = np.random.normal(mean_a, sd_v, size=1)
    z = np.random.normal(mean_z, sd_tz, size=1)
    t = np.random.normal(mean_t, sd_tz, size=1)
# v is a vector which differs over trials by x and y, so we have different v for every trial - other params are same for all trials
    true_values = np.column_stack(
     [v, np.repeat(a, axis=0, repeats=n_trials), np.repeat(z, axis=0, repeats=n_trials), np.repeat(t, axis=0, repeats=n_trials)]
)
    # Simulate data
    obs_ddm_reg_v = hssm.simulate_data(model="ddm", theta=true_values, size=1)
   # store ground truth params
    param_list.append(
       pd.DataFrame(
           {
               "intercept": intercept,
               "v_x": v_x,
               "v_y": v_y,
               "a": a,
               "z": z,
               "t": t,
            }
       )
       )
    # Append simulated data to list
    data_list.append(
        pd.DataFrame(
            {
                "rt": obs_ddm_reg_v["rt"],
                "response": obs_ddm_reg_v["response"],
                "x": x,
                "y": y,
                "subject": i,
            }
        )
    )
# Make single dataframe out of subject-wise datasets
dataset_reg_v_hier = pd.concat(data_list)
dataset_reg_v_hier

Model

model_reg_v_ddm_hier1A  = hssm.HSSM(
   data=dataset_reg_v_hier,
   # loglik_kind="approx_differentiable",
    prior_settings = "safe",
        include=[
        {
            "name": "v",
            "formula": "v ~ 1 + x + y + (1 + x + y| subject)",
            "prior": {
                "Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
                "x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
                "y": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
                "1|subject": {"name": "Normal",
                        "mu": 0, # using non-centered approach so mu's of indiv subject offsets should be 0
                              "sigma": {"name": "HalfNormal",
                                        "sigma": 1
                                        }, "initval": 0.5
                },
                "x|subject": {"name": "Normal",
                              "mu": 0,
                              "sigma": {"name": "HalfNormal",
                                        "sigma": 0.5,
                                        }, "initval": 0.5
                },
                  "y|subject": {"name": "Normal",
                              "mu": 0,
                              "sigma": {"name": "HalfNormal",
                                        "sigma": 0.5,
                                       }, "initval": 0.5
            },
            },
            "link": "identity",
        },
        {
            "name": "t",
            "formula": "t ~ 1 + (1 | subject)",
            "prior": {
           "Intercept": {"name": "Normal", "mu": 0.5, "sigma": 0.4, "initval": 0.3},
            "1|subject": {"name": "Normal",
                              "mu":  0,
                              "sigma": {"name": "HalfNormal",
                                        "sigma": 0.5, "initval": .1
                                        },
                },
            },
           "link": "identity",
        },
        {
            "name": "z",
           "formula": "z ~ 1 + (1 | subject)",
             "prior": {
            #    "Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
                "1|subject": {"name": "Normal",
                              "mu":  0,
                              "sigma": {"name": "HalfNormal",
                                        "sigma": 0.05, "initval": .01
                                        },
                },
            },
        },
        {
            "name": "a",
            "formula": "a ~ 1 + (1 | subject)",
            "prior": {
                "Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 1.75, "initval": 1},
                "1|subject": {"name": "Normal",
                              "mu":  0,
                              "sigma": {"name": "HalfNormal",
                                        "sigma": 1, "initval": 0.3
                                        },
                },
            },
       },
    ],
  noncentered = True,
 #  p_outlier=0
)
model_reg_v_ddm_hier1A

As reported by @frankmj , this samples easily for analytic, but somehow gets stuck for approx_differentiable.

Need to investigate this.