pymc-devs / pymc

Bayesian Modeling and Probabilistic Programming in Python
https://docs.pymc.io/
Other
8.62k stars 1.99k forks source link

Unable to define priors using pymc with adstock and saturation #7145

Closed shuvayan closed 7 months ago

shuvayan commented 7 months ago

Description

Hello,

I am using the below piece of code to define the adstock , saturation effects :

def adstock_geometric_pytensor(x, theta):
    x = torch.tensor(x,dtype=torch.float32)
    theta = torch.tensor(theta, dtype=torch.float32)

    def adstock_geometric_recurrence_pytensor(index, input_x, decay_x, theta):
        decay_x[index] = input_x + theta * decay_x[index-1]
        return decay_x

    len_observed = x.shape[0]

    x_decayed = torch.zeros_like(x)
    x_decayed[0] = x[0]

    for index in range(1,len_observed):
        x_decayed = adstock_geometric_recurrence_pytensor(index,x[index],x_decayed,theta)

    return x_decayed

def saturation_hill_pymc(x, alpha, gamma): 

    x_s_hill = x ** alpha / (x ** alpha + gamma ** alpha)

    return x_s_hill

Post this I define my model development as below:

START_ANALYSIS_INDEX = 1
END_ANALYSIS_INDEX = 292

transform_variables = ["trend", "season", "holiday", 'Audio_s','Cable_s']

delay_channels = ['Audio_s','Cable_s']
media_channels = ['Audio_s','Cable_s']
control_variables = ["trend", "season", "holiday"]

target = "outcome"

data_transformed = final_data.copy()

numerical_encoder_dict = {}

for feature in transform_variables:
    scaler = MinMaxScaler()
    original = final_data[feature].values.reshape(-1, 1)
    transformed = scaler.fit_transform(original)
    data_transformed[feature] = transformed
    numerical_encoder_dict[feature] = scaler

dependent_transformation = None
original = final_data[target].values
data_transformed[target] = original 

Model 1:

response_mean = []
with pm.Model() as model_2:
    for channel_name in delay_channels:

        print(f"Delay Channels: Adding {channel_name}")

        x = data_transformed[channel_name].values

        adstock_param = pm.Beta(f"{channel_name}_adstock",3,3)
        pidata = pm.sample(1000,random_seed=43)
        adstock_param = pidata.posterior[f"{channel_name}_adstock"].to_numpy()

        saturation_gamma = pm.Beta(f"{channel_name}_gamma",2,2)
        pidata = pm.sample(1000,random_seed=43)
        saturation_gamma = pidata.posterior[f"{channel_name}_gamma"].to_numpy()

        saturation_alpha = pm.Beta(f"{channel_name}_alpha",3,1)
        pidata = pm.sample(1000,random_seed=43)
        saturation_alpha = pidata.posterior[f"{channel_name}_alpha"].to_numpy()

        x_new = adstock_geometric_pytensor(x, adstock_param)
        x_new_sliced = x_new[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
        saturation_tensor = saturation_hill_pymc(x_new_sliced, saturation_alpha, saturation_gamma)

        channel_b = pm.HalfNormal(f"{channel_name}_media_coef", sigma = 3)
        pidata = pm.sample(1000,random_seed=43)
        channel_b = pidata.posterior[f"{channel_name}_media_coef"].to_numpy()
        response_mean.append(saturation_tensor * channel_b)

    for control_var in control_variables:

        print(f"Control Variables: Adding {control_var}")

        x = data_transformed[control_var].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
        control_beta = pm.Normal(f"{control_var}_control_coef", sigma = 3)
        pidata = pm.sample(1000,random_seed=43)
        control_beta = pidata.posterior[f"{control_var}_control_coef"].to_numpy()
        control_x = control_beta * x
        response_mean.append(control_x)

    print(f"Intercept Defining: Adding --intercept")
    intercept = pm.Normal("intercept", np.mean(data_transformed[target].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]), sigma = 3)
    print(f"Intercept Defined: Added --{intercept}")
    pidata = pm.sample(50,random_seed=43)
    intercept = pidata.posterior["intercept"].to_numpy()

    print(f"Sigma Defining: Adding --Sigma")
    sigma = pm.HalfNormal("sigma", 4)
    print(f"Sigma Defined: Added --{sigma}")
    pidata = pm.sample(50,random_seed=43)
    sigma = pidata.posterior["sigma"].to_numpy()

    print(f"likelihood Defining: Adding --Likelihood")
    likelihood = pm.Normal("likelihood", mu = intercept , sigma = sigma)
    print(f"likelihood Defined: Added --{likelihood}")
    pidata = pm.sample(50,random_seed=43)
    print(f"pidata Defined: Added --PiData")
    likelihood = pidata.posterior["likelihood"].to_numpy()

What am I doing wrong here? Cannot figure it out for the life of me, can someone please help me with this? I am pretty new to pymc and messing it up.

Below is the error message that I see.

RuntimeError                              Traceback (most recent call last)
File <timed exec>:51

Cell In[21], line 47, in adstock_geometric_pytensor(x, theta)
     44 x_decayed[0] = x[0]
     46 for index in range(1,len_observed):
---> 47     x_decayed = adstock_geometric_recurrence_pytensor(index,x[index],x_decayed,theta)
     49 return x_decayed

Cell In[21], line 38, in adstock_geometric_pytensor.<locals>.adstock_geometric_recurrence_pytensor(index, input_x, decay_x, theta)
     37 def adstock_geometric_recurrence_pytensor(index, input_x, decay_x, theta):
---> 38     decay_x[index] = input_x + theta * decay_x[index-1]
     39     return decay_x

RuntimeError: expand(torch.FloatTensor{[4, 1000]}, size=[]): the number of sizes provided (0) must be greater or equal to the number of dimensions in the tensor (2)

Note : Most of the above code base is in pymc3 which I have upgraded to lates pymc version and redeveloping the code.

The earlier adstock function using theano was as below:

def adstock_geometric_theano_pymc3(x, theta):
    x = tt.as_tensor_variable(x)
    #x = tt.vector("x")
    #theta = tt.scalar("theta")

    def adstock_geometric_recurrence_theano(index, input_x, decay_x, theta):
        return tt.set_subtensor(decay_x[index], tt.sum(input_x + theta * decay_x[index - 1]))

    len_observed = x.shape[0]

    x_decayed = tt.zeros_like(x)
    x_decayed = tt.set_subtensor(x_decayed[0], x[0])

    output, _ = theano.scan(
        fn = adstock_geometric_recurrence_theano, 
        sequences = [tt.arange(1, len_observed), x[1:len_observed]], 
        outputs_info = x_decayed,
        non_sequences = theta, 
        n_steps = len_observed - 1
    )

    return output[-1]

And the model definitions were as below :

transform_variables = ["trend", "season", "holiday", 'Audio_s','Cable_s']

delay_channels = ['Audio_s','Cable_s']
media_channels = ['Audio_s','Cable_s']
control_variables = ["trend", "season", "holiday"]

target = "outcome"

data_transformed = data.copy()

numerical_encoder_dict = {}

for feature in transform_variables:
    scaler = MinMaxScaler()
    original = data[feature].values.reshape(-1, 1)
    transformed = scaler.fit_transform(original)
    data_transformed[feature] = transformed
    numerical_encoder_dict[feature] = scaler

dependent_transformation = None
original = data[target].values
data_transformed[target] = original / 100_000

response_mean = []
with pm.Model() as model_2:
    for channel_name in delay_channels:
        print(f"Delay Channels: Adding {channel_name}")

        x = data_transformed[channel_name].values

        adstock_param = pm.Beta(f"{channel_name}_adstock", 3, 3)
        saturation_gamma = pm.Beta(f"{channel_name}_gamma", 2, 2)
        saturation_alpha = pm.Gamma(f"{channel_name}_alpha", 3, 1)

        x_new = adstock_geometric_theano_pymc3(x, adstock_param)
        x_new_sliced = x_new[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
        saturation_tensor = saturation_hill_pymc3(x_new_sliced, saturation_alpha, saturation_gamma)

        channel_b = pm.HalfNormal(f"{channel_name}_media_coef", sd = 3)
        response_mean.append(saturation_tensor * channel_b)

    for control_var in control_variables:
        print(f"Control Variables: Adding {control_var}")

        x = data_transformed[control_var].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]

        control_beta = pm.Normal(f"{control_var}_control_coef", sd = 3)
        control_x = control_beta * x
        response_mean.append(control_x)

    intercept = pm.Normal("intercept", np.mean(data_transformed[target].values), sd = 3)
    #intercept = pm.HalfNormal("intercept", 0, sd = 3)

    sigma = pm.HalfNormal("sigma", 4)

    likelihood = pm.Normal("outcome", mu = intercept + sum(response_mean), sd = sigma, observed = data_transformed[target].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX])
welcome[bot] commented 7 months ago

Welcome Banner] :tada: Welcome to PyMC! :tada: We're really excited to have your input into the project! :sparkling_heart:
If you haven't done so already, please make sure you check out our Contributing Guidelines and Code of Conduct.

ricardoV94 commented 7 months ago

Hi @shuvayan

This seems like a user question which should be posted on our discourse: https://discourse.pymc.io/

We use GitHub for development discussions only.

Cheers