Closed zijianwang2000 closed 2 years ago
Hi @zijianwang2000,
thanks for notifying me about this problem. The error you were getting was due to a bug in the master branch. I have now added a FlattenNetwork
, as per your suggestion, in the networks.py
module and fixed the bug. The FlattenNetwork
can be imported in any script and used accordingly.
Cheers, Stefan
Dear Stefan, thanks a lot for the quick response!
Best regards Zijian
Hi Stefan,
I tried to use FlattenNetwork in the parameter estimation workflow for the conversion model. However, I am still getting an error concerning shapes/dimensions (see below). The same error arises when I use FlattenNetwork for a self-implemented MVN model for which I want to estimate the posterior mean. Could you please have a look and tell me what I am doing wrong? Thanks a lot in advance!
Kind regards Zijian
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from numba import njit
import tensorflow as tf
from bayesflow.networks import InvertibleNetwork, InvariantNetwork, FlattenNetwork
from bayesflow.amortizers import SingleModelAmortizer
from bayesflow.trainers import ParameterEstimationTrainer
from bayesflow.diagnostics import *
from bayesflow.models import GenerativeModel
def prior(batch_size):
"""
Samples from the prior 'batch_size' times.
----------
Arguments:
batch_size : int -- the number of samples to draw from the prior
----------
Output:
theta : np.ndarray of shape (batch_size, theta_dim) -- the samples batch of parameters
"""
# Prior ranges for the simulator
# v_c ~ U(-7.0, 7.0)
# a_c ~ U(0.1, 4.0)
# t0 ~ U(0.1, 3.0)
p_samples = np.random.uniform(low=(0.1, 0.1, 0.1, 0.1, 0.1),
high=(7.0, 7.0, 4.0, 4.0, 3.0), size=(batch_size, 5))
return p_samples.astype(np.float32)
@njit
def diffusion_trial(v, a, ndt, zr, dt, max_steps):
"""Simulates a trial from the diffusion model."""
n_steps = 0.
x = a * zr
# Simulate a single DM path
while (x > 0 and x < a and n_steps < max_steps):
# DDM equation
x += v*dt + np.sqrt(dt) * np.random.normal()
# Increment step
n_steps += 1.0
rt = n_steps * dt
return rt + ndt if x > 0. else -rt - ndt
@njit
def diffusion_condition(n_trials, v, a, ndt, zr=0.5, dt=0.005, max_steps=1e4):
"""Simulates a diffusion process over an entire condition."""
x = np.empty(n_trials)
for i in range(n_trials):
x[i] = diffusion_trial(v, a, ndt, zr, dt, max_steps)
return x
@njit
def diffusion_2_conds(params, n_trials, dt=0.005, max_steps=1e4):
"""
Simulates a diffusion process for 2 conditions with 5 parameters (v1, v2, a1, a2, ndt).
"""
n_trials_c1 = n_trials[0]
n_trials_c2 = n_trials[1]
v1, v2, a1, a2, ndt = params
rt_c1 = diffusion_condition(n_trials_c1, v1, a1, ndt, dt=dt, max_steps=max_steps)
rt_c2 = diffusion_condition(n_trials_c2, v2, a2, ndt, dt=dt, max_steps=max_steps)
rts = np.concatenate((rt_c1, rt_c2))
return rts
def batch_simulator(prior_samples, n_obs, dt=0.005, s=1.0, max_iter=1e4):
"""
Simulate multiple diffusion_model_datasets.
"""
n_sim = prior_samples.shape[0]
sim_data = np.empty((n_sim, n_obs), dtype=np.float32)
n1 = n_obs//2
n2 = n_obs - n1
# Simulate diffusion data
for i in range(n_sim):
sim_data[i] = diffusion_2_conds(prior_samples[i], (n1, n2))
# Create condition labels
cond_arr = np.stack(n_sim * [np.concatenate((np.zeros(n1), np.ones(n2)))] )
sim_data = np.stack((sim_data, cond_arr), axis=-1)
return sim_data
summary_net = FlattenNetwork()
inference_net = InvertibleNetwork({'n_params': 5})
amortizer = SingleModelAmortizer(inference_net, summary_net)
generative_model = GenerativeModel(prior, batch_simulator)
trainer = ParameterEstimationTrainer(
network=amortizer,
generative_model=generative_model,
)
# Variable n_obs
def prior_N(n_min=60, n_max=300):
"""
A prior or the number of observation (will be called internally at each backprop step).
"""
return np.random.randint(n_min, n_max+1)
%%time
losses = trainer.train_online(epochs=2, iterations_per_epoch=100, batch_size=32, n_obs=prior_N)
Training epoch 1: 0%
0/100 [00:00<?, ?it/s]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<timed exec> in <module>
~\Bachelorarbeit - IMPORTANT\bayesflow\trainers.py in train_online(self, epochs, iterations_per_epoch, batch_size, n_obs, **kwargs)
129
130 # One step backprop
--> 131 loss = self._train_step(*args)
132
133 # Store loss into dictionary
~\Bachelorarbeit - IMPORTANT\bayesflow\trainers.py in _train_step(self, *args)
344 """
345 with tf.GradientTape() as tape:
--> 346 loss = self.loss(self.network, *args)
347
348 # One step backprop
~\Bachelorarbeit - IMPORTANT\bayesflow\losses.py in kl_latent_space_gaussian(network, *args)
63 """
64
---> 65 z, log_det_J = network(*args)
66 loss = tf.reduce_mean(0.5 * tf.square(tf.norm(z, axis=-1)) - log_det_J)
67 return loss
~\AppData\Roaming\Python\Python37\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
~\Bachelorarbeit - IMPORTANT\bayesflow\amortizers.py in call(self, params, sim_data, return_summary)
212
213 # Compute output of inference net
--> 214 out = self.inference_net(params, sim_data)
215
216 if not return_summary:
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in call(self, target, condition, inverse)
625 if inverse:
626 return self.inverse(target, condition)
--> 627 return self.forward(target, condition)
628
629 def forward(self, target, condition):
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in forward(self, target, condition)
633 log_det_Js = []
634 for layer in self.coupling_layers:
--> 635 z, log_det_J = layer(z, condition)
636 log_det_Js.append(log_det_J)
637 # Sum Jacobian determinants for all layers (coupling blocks) to obtain total Jacobian.
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in call(self, target_or_z, condition, inverse)
529
530 if not inverse:
--> 531 return self.forward(target_or_z, condition)
532 return self.inverse(target_or_z, condition)
533
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in forward(self, target, condition)
548
549 # Pass through coupling layer
--> 550 z, log_det_J_c = self._forward(target, condition)
551 log_det_Js += log_det_J_c
552
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in _forward(self, target, condition)
445
446 # Pre-compute network outputs for v1
--> 447 s1 = self.s1(u2, condition)
448 # Clamp s1 if specified
449 if self.alpha is not None:
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in call(self, target, condition)
382 condition = tf.stack([condition] * N, axis=1)
383 inp = tf.concat((target, condition), axis=-1)
--> 384 out = self.dense(inp)
385 return out
386
ValueError: Exception encountered when calling layer "sequential_25" (type Sequential).
Input 0 of layer "dense_64" is incompatible with the layer: expected axis -1of input shape to have value 103, but received input with shape (32, 287)
Call arguments received:
• inputs=tf.Tensor(shape=(32, 287), dtype=float32)
• training=None
• mask=None
Hey Zijian,
the FlattenNetwork
cannot possibly deal with inputs of variable size, since each batch will have a different feature dimension. I suggest you always use the InvariantNetwork
for variable-size inputs or hard-coded statistics, if available and needed.
Best, Stefan
Sorry, I should have mentioned that I receive the same error also when using fixed n_obs, e.g.
# Pre-simulated data (could be loaded from somewhere else)
n_sim = 5000
n_obs = 10
true_params, x = generative_model(n_sim, n_obs)
%%time
losses = trainer.train_offline(epochs=1, batch_size=64, params=true_params, sim_data=x)
Converting 5000 simulations to a TensorFlow data set...
Training epoch 1: 0%
0/79 [00:00<?, ?it/s]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<timed exec> in <module>
~\Bachelorarbeit - IMPORTANT\bayesflow\trainers.py in train_offline(self, epochs, batch_size, *args, **kwargs)
235
236 # One step backpropagation
--> 237 loss = self._train_step(*args_b)
238
239 # Store loss and update progress bar
~\Bachelorarbeit - IMPORTANT\bayesflow\trainers.py in _train_step(self, *args)
344 """
345 with tf.GradientTape() as tape:
--> 346 loss = self.loss(self.network, *args)
347
348 # One step backprop
~\Bachelorarbeit - IMPORTANT\bayesflow\losses.py in kl_latent_space_gaussian(network, *args)
63 """
64
---> 65 z, log_det_J = network(*args)
66 loss = tf.reduce_mean(0.5 * tf.square(tf.norm(z, axis=-1)) - log_det_J)
67 return loss
~\AppData\Roaming\Python\Python37\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
~\Bachelorarbeit - IMPORTANT\bayesflow\amortizers.py in call(self, params, sim_data, return_summary)
212
213 # Compute output of inference net
--> 214 out = self.inference_net(params, sim_data)
215
216 if not return_summary:
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in call(self, target, condition, inverse)
625 if inverse:
626 return self.inverse(target, condition)
--> 627 return self.forward(target, condition)
628
629 def forward(self, target, condition):
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in forward(self, target, condition)
633 log_det_Js = []
634 for layer in self.coupling_layers:
--> 635 z, log_det_J = layer(z, condition)
636 log_det_Js.append(log_det_J)
637 # Sum Jacobian determinants for all layers (coupling blocks) to obtain total Jacobian.
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in call(self, target_or_z, condition, inverse)
529
530 if not inverse:
--> 531 return self.forward(target_or_z, condition)
532 return self.inverse(target_or_z, condition)
533
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in forward(self, target, condition)
548
549 # Pass through coupling layer
--> 550 z, log_det_J_c = self._forward(target, condition)
551 log_det_Js += log_det_J_c
552
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in _forward(self, target, condition)
445
446 # Pre-compute network outputs for v1
--> 447 s1 = self.s1(u2, condition)
448 # Clamp s1 if specified
449 if self.alpha is not None:
~\Bachelorarbeit - IMPORTANT\bayesflow\networks.py in call(self, target, condition)
382 condition = tf.stack([condition] * N, axis=1)
383 inp = tf.concat((target, condition), axis=-1)
--> 384 out = self.dense(inp)
385 return out
386
ValueError: Exception encountered when calling layer "sequential_73" (type Sequential).
Input 0 of layer "dense_208" is incompatible with the layer: expected axis -1of input shape to have value 103, but received input with shape (64, 23)
Call arguments received:
• inputs=tf.Tensor(shape=(64, 23), dtype=float32)
• training=None
• mask=None
I see where the problem comes from. You should set skip_checks=True
when initializing your Trainer
instance, for instance:
trainer = ParameterEstimationTrainer(
network=amortizer,
generative_model=generative_model,
skip_checks=True
)
The problem otherwise is, that the internal check is performed with a different fixed n_obs
, so the input shape is inadvertently fixed before the training. I will think of a better solution, but for now the above should get you going!
Ah, I see. Now it's working. Thanks a lot!
Best regards Zijian
In case the datasets have small sizes, it seems reasonable not to use any summary network. However, if we set summary_net = None, some problems concerning dimensions arise: Raw data are stored in an array of shape (batch_size, n_obs, D = dimension of each observation), while summary networks have outputs of shape (batch_size, summary_stats_dim). Thus, it would be helpful to have a IdentityNetwork which simply concatenates a dataset of multiple observations to one single vector. The outputs of the IdentityNetwork would then represent the case in which we use no summary network, but match the required input dimension of the inference network.
An own try to implement such an IdentityNetwork resulted in the following error: