Calculating log_prob fails using `tf.vectorized_map`

Sayam753 commented 4 years ago

I have been trying to fit Auto Regressive model by Mean Field ADVI. But using tf.vectorized_map while calculating log_prob results in XLA and Dtype issues.

Code Snippet

```python import numpy as np import tensorflow as tf import tensorflow_probability as tfp from tensorflow_probability.python.mcmc.transformed_kernel import ( make_transformed_log_prob, ) dtype = tf.float32 tfb = tfp.bijectors tfd = tfp.distributions # Generating data np.random.seed(seed=42) T = 100 y = np.zeros((T,)) for i in range(1,T): y[i] = 0.95 * y[i-1] + np.random.normal() data = y.reshape(-1, 1) model = tfd.JointDistributionSequential([ tfd.Normal(loc=0, scale=1.), lambda e: tfp.sts.AutoregressiveStateSpaceModel( num_timesteps=100, coefficients=[e], level_scale=0.1, initial_state_prior=tfd.MultivariateNormalDiag(scale_diag=[1e-6]), ) ]) def vectorize_function(function): def vectorizedfn(*q_samples): return tf.vectorized_map(lambda samples: function(*samples), q_samples) return vectorizedfn joint_log_prob = vectorize_function(lambda *x: model.log_prob(x+(data, ))) # joint_log_prob = lambda *x: model.log_prob(x+(data, )) # Transformations to bounded space unconstraining_bijectors = [tfb.Identity()] target_log_prob = make_transformed_log_prob( joint_log_prob, unconstraining_bijectors, direction="forward", enable_bijector_caching=False, ) def build_mf_advi(): parameters = model.sample()[:-1] dists = [] for i, parameter in enumerate(parameters): shape = parameter.shape loc = tf.Variable( tf.random.normal(shape, dtype=dtype), name=f"meanfield_{i}_loc", dtype=dtype, ) scale = tfp.util.TransformedVariable( tf.fill(shape, value=tf.constant(1, dtype=dtype)), tfb.Softplus(), # For positive values of scale name=f"meanfield_{i}_scale", ) approx_parameter = tfd.Normal(loc=loc, scale=scale) dists.append(approx_parameter) return tfd.JointDistributionSequential(dists) posterior = build_mf_advi() num_steps = 5_000 def trace_fn(trace): tf.cond( tf.math.mod(trace.step, 100) == 0, lambda: tf.print(trace.step, "/", num_steps, "Loss:", trace.loss, end="\r"), lambda: tf.print("", end="") ) return trace.loss opt = tf.optimizers.Adam(learning_rate=0.1) @tf.function(autograph=False) def run_approximation(): elbo_loss = tfp.vi.fit_surrogate_posterior( target_log_prob, surrogate_posterior=posterior, optimizer=opt, num_steps=num_steps, trace_fn=trace_fn ) return elbo_loss elbo_loss = run_approximation() ```

Traceback

```python --------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in get_attr(self, name) 2511 with c_api_util.tf_buffer() as buf: -> 2512 pywrap_tf_session.TF_OperationGetAttrValueProto(self._c_op, name, buf) 2513 data = pywrap_tf_session.TF_GetBuffer(buf) InvalidArgumentError: Operation 'while' has no attr named '_XlaCompile'. During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn) 330 try: --> 331 xla_compile = op.get_attr("_XlaCompile") 332 xla_separate_compiled_gradients = op.get_attr( /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in get_attr(self, name) 2515 # Convert to ValueError for backwards compatibility. -> 2516 raise ValueError(str(e)) 2517 x = attr_value_pb2.AttrValue() ValueError: Operation 'while' has no attr named '_XlaCompile'. During handling of the above exception, another exception occurred: InvalidArgumentError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in get_attr(self, name) 2511 with c_api_util.tf_buffer() as buf: -> 2512 pywrap_tf_session.TF_OperationGetAttrValueProto(self._c_op, name, buf) 2513 data = pywrap_tf_session.TF_GetBuffer(buf) InvalidArgumentError: Operation 'while/while_body/cond' has no attr named '_XlaCompile'. During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn) 330 try: --> 331 xla_compile = op.get_attr("_XlaCompile") 332 xla_separate_compiled_gradients = op.get_attr( /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in get_attr(self, name) 2515 # Convert to ValueError for backwards compatibility. -> 2516 raise ValueError(str(e)) 2517 x = attr_value_pb2.AttrValue() ValueError: Operation 'while/while_body/cond' has no attr named '_XlaCompile'. During handling of the above exception, another exception occurred: InvalidArgumentError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in get_attr(self, name) 2511 with c_api_util.tf_buffer() as buf: -> 2512 pywrap_tf_session.TF_OperationGetAttrValueProto(self._c_op, name, buf) 2513 data = pywrap_tf_session.TF_GetBuffer(buf) InvalidArgumentError: Operation 'while/while_body/cond/PartitionedCall' has no attr named '_XlaCompile'. During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn) 330 try: --> 331 xla_compile = op.get_attr("_XlaCompile") 332 xla_separate_compiled_gradients = op.get_attr( /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in get_attr(self, name) 2515 # Convert to ValueError for backwards compatibility. -> 2516 raise ValueError(str(e)) 2517 x = attr_value_pb2.AttrValue() ValueError: Operation 'while/while_body/cond/PartitionedCall' has no attr named '_XlaCompile'. During handling of the above exception, another exception occurred: InvalidArgumentError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in get_attr(self, name) 2511 with c_api_util.tf_buffer() as buf: -> 2512 pywrap_tf_session.TF_OperationGetAttrValueProto(self._c_op, name, buf) 2513 data = pywrap_tf_session.TF_GetBuffer(buf) InvalidArgumentError: Operation 'monte_carlo_variational_loss/expectation/loop_body/JointDistributionSequential/log_prob/monte_carlo_variational_loss_expectation_loop_body_JointDistributionSequential_log_prob_AutoregressiveStateSpaceModel/log_prob/scan/while/TensorArrayV2Write/TensorListSetItem/pfor/Tile' has no attr named '_XlaCompile'. During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn) 330 try: --> 331 xla_compile = op.get_attr("_XlaCompile") 332 xla_separate_compiled_gradients = op.get_attr( /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in get_attr(self, name) 2515 # Convert to ValueError for backwards compatibility. -> 2516 raise ValueError(str(e)) 2517 x = attr_value_pb2.AttrValue() ValueError: Operation 'monte_carlo_variational_loss/expectation/loop_body/JointDistributionSequential/log_prob/monte_carlo_variational_loss_expectation_loop_body_JointDistributionSequential_log_prob_AutoregressiveStateSpaceModel/log_prob/scan/while/TensorArrayV2Write/TensorListSetItem/pfor/Tile' has no attr named '_XlaCompile'. During handling of the above exception, another exception occurred: TypeError Traceback (most recent call last) in 24 25 ---> 26 elbo_loss = run_approximation() /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds) 794 else: 795 compiler = "nonXla" --> 796 result = self._call(*args, **kwds) 797 798 new_tracing_count = self._get_tracing_count() /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds) 837 # This is the first call of __call__, so we have to initialize. 838 initializers = [] --> 839 self._initialize(args, kwds, add_initializers_to=initializers) 840 finally: 841 # At this point we know that the initialization is complete (or less /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to) 710 self._concrete_stateful_fn = ( 711 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access --> 712 *args, **kwds)) 713 714 def invalid_creator_scope(*unused_args, **unused_kwds): /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs) 2946 args, kwargs = None, None 2947 with self._lock: -> 2948 graph_function, _, _ = self._maybe_define_function(args, kwargs) 2949 return graph_function 2950 /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs) 3317 3318 self._function_cache.missed.add(call_context_key) -> 3319 graph_function = self._create_graph_function(args, kwargs) 3320 self._function_cache.primary[cache_key] = graph_function 3321 /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes) 3179 arg_names=arg_names, 3180 override_flat_arg_shapes=override_flat_arg_shapes, -> 3181 capture_by_value=self._capture_by_value), 3182 self._function_attributes, 3183 function_spec=self.function_spec, /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 984 _, original_func = tf_decorator.unwrap(python_func) 985 --> 986 func_outputs = python_func(*func_args, **func_kwargs) 987 988 # invariant: `func_outputs` contains only Tensors, CompositeTensors, /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds) 612 # __wrapped__ allows AutoGraph to swap in a converted function. We give 613 # the function a weak reference to itself to avoid a reference cycle. --> 614 return weak_wrapped_fn().__wrapped__(*args, **kwds) 615 weak_wrapped_fn = weakref.ref(wrapped_fn) 616 in run_approximation() 19 optimizer=opt, 20 num_steps=num_steps, ---> 21 trace_fn=trace_fn 22 ) 23 return elbo_loss /usr/local/lib/python3.7/site-packages/tensorflow_probability/python/vi/optimization.py in fit_surrogate_posterior(target_log_prob_fn, surrogate_posterior, optimizer, num_steps, convergence_criterion, trace_fn, variational_loss_fn, sample_size, trainable_variables, seed, name) 299 trace_fn=trace_fn, 300 trainable_variables=trainable_variables, --> 301 name=name) /usr/local/lib/python3.7/site-packages/tensorflow_probability/python/math/minimize.py in minimize(loss_fn, num_steps, optimizer, convergence_criterion, batch_convergence_reduce_fn, trainable_variables, trace_fn, return_full_length_trace, name) 335 loss_fn=loss_fn, optimizer=optimizer, 336 trainable_variables=trainable_variables) --> 337 initial_loss, initial_grads, initial_parameters = optimizer_step_fn() 338 has_converged = None 339 initial_convergence_criterion_state = None /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds) 794 else: 795 compiler = "nonXla" --> 796 result = self._call(*args, **kwds) 797 798 new_tracing_count = self._get_tracing_count() /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds) 837 # This is the first call of __call__, so we have to initialize. 838 initializers = [] --> 839 self._initialize(args, kwds, add_initializers_to=initializers) 840 finally: 841 # At this point we know that the initialization is complete (or less /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to) 710 self._concrete_stateful_fn = ( 711 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access --> 712 *args, **kwds)) 713 714 def invalid_creator_scope(*unused_args, **unused_kwds): /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs) 2946 args, kwargs = None, None 2947 with self._lock: -> 2948 graph_function, _, _ = self._maybe_define_function(args, kwargs) 2949 return graph_function 2950 /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs) 3317 3318 self._function_cache.missed.add(call_context_key) -> 3319 graph_function = self._create_graph_function(args, kwargs) 3320 self._function_cache.primary[cache_key] = graph_function 3321 /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes) 3179 arg_names=arg_names, 3180 override_flat_arg_shapes=override_flat_arg_shapes, -> 3181 capture_by_value=self._capture_by_value), 3182 self._function_attributes, 3183 function_spec=self.function_spec, /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 984 _, original_func = tf_decorator.unwrap(python_func) 985 --> 986 func_outputs = python_func(*func_args, **func_kwargs) 987 988 # invariant: `func_outputs` contains only Tensors, CompositeTensors, /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds) 612 # __wrapped__ allows AutoGraph to swap in a converted function. We give 613 # the function a weak reference to itself to avoid a reference cycle. --> 614 return weak_wrapped_fn().__wrapped__(*args, **kwds) 615 weak_wrapped_fn = weakref.ref(wrapped_fn) 616 /usr/local/lib/python3.7/site-packages/tensorflow_probability/python/math/minimize.py in optimizer_step() 102 loss = loss_fn() 103 watched_variables = tape.watched_variables() --> 104 grads = tape.gradient(loss, watched_variables) 105 train_op = optimizer.apply_gradients(zip(grads, watched_variables)) 106 with tf.control_dependencies([train_op]): /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/backprop.py in gradient(self, target, sources, output_gradients, unconnected_gradients) 1081 output_gradients=output_gradients, 1082 sources_raw=flat_sources_raw, -> 1083 unconnected_gradients=unconnected_gradients) 1084 1085 if not self._persistent: /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/imperative_grad.py in imperative_grad(tape, target, sources, output_gradients, sources_raw, unconnected_gradients) 75 output_gradients, 76 sources_raw, ---> 77 compat.as_str(unconnected_gradients.value)) /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _backward_function(*args) 802 def _backward_function(*args): 803 call_op = outputs[0].op --> 804 return self._rewrite_forward_and_call_backward(call_op, *args) 805 return _backward_function, outputs 806 /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _rewrite_forward_and_call_backward(self, op, *doutputs) 717 def _rewrite_forward_and_call_backward(self, op, *doutputs): 718 """Add outputs to the forward call and feed them to the grad function.""" --> 719 forward_function, backwards_function = self.forward_backward(len(doutputs)) 720 if not backwards_function.outputs: 721 return backwards_function.structured_outputs /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in forward_backward(self, num_doutputs) 626 if forward_backward is not None: 627 return forward_backward --> 628 forward, backward = self._construct_forward_backward(num_doutputs) 629 self._cached_function_pairs[num_doutputs] = (forward, backward) 630 return forward, backward /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _construct_forward_backward(self, num_doutputs) 674 args=[], kwargs={}, 675 signature=signature, --> 676 func_graph=backwards_graph) 677 backwards_graph_captures = backwards_graph.external_captures 678 captures_from_forward = [ /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 984 _, original_func = tf_decorator.unwrap(python_func) 985 --> 986 func_outputs = python_func(*func_args, **func_kwargs) 987 988 # invariant: `func_outputs` contains only Tensors, CompositeTensors, /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _backprop_function(*grad_ys) 664 self._func_graph.inputs, 665 grad_ys=grad_ys, --> 666 src_graph=self._func_graph) 667 668 with self._func_graph.as_default(): /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients, src_graph) 681 # functions. 682 in_grads = _MaybeCompile(grad_scope, op, func_call, --> 683 lambda: grad_fn(op, *out_grads)) 684 else: 685 # For function call ops, we add a 'SymbolicGradient' /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn) 334 xla_scope = op.get_attr("_XlaScope").decode() 335 except ValueError: --> 336 return grad_fn() # Exit early 337 338 if not xla_compile: /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in () 681 # functions. 682 in_grads = _MaybeCompile(grad_scope, op, func_call, --> 683 lambda: grad_fn(op, *out_grads)) 684 else: 685 # For function call ops, we add a 'SymbolicGradient' /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/while_v2.py in _WhileGrad(op, *grads) 352 body_grad_graph, args = _create_grad_func( 353 ys, xs, non_none_grads, cond_graph, body_graph, --> 354 util.unique_grad_fn_name(body_graph.name), op, maximum_iterations) 355 356 if body_grad_graph.while_op_needs_rewrite: /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/while_v2.py in _create_grad_func(ys, xs, grads, cond_graph, body_graph, name, while_op, maximum_iterations) 624 func_graph=_WhileBodyGradFuncGraph(name, cond_graph, body_graph, 625 maximum_iterations, while_op, --> 626 body_graph_inputs, body_graph_outputs)) 627 628 # Update the list of outputs with tensors corresponding to the captured /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 984 _, original_func = tf_decorator.unwrap(python_func) 985 --> 986 func_outputs = python_func(*func_args, **func_kwargs) 987 988 # invariant: `func_outputs` contains only Tensors, CompositeTensors, /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/while_v2.py in (*args) 620 grad_func_graph = func_graph_module.func_graph_from_py_func( 621 name, --> 622 lambda *args: _grad_fn(ys, xs, args, body_graph), 623 args, {}, 624 func_graph=_WhileBodyGradFuncGraph(name, cond_graph, body_graph, /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/while_v2.py in _grad_fn(ys, xs, args, func_graph) 680 grad_outs = gradients_util._GradientsHelper( 681 ys, xs, grad_ys=grad_ys, src_graph=func_graph, --> 682 unconnected_gradients="zero") 683 684 # TODO(b/118712257): Handle the case when grad_outs has None's e.g. when there /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients, src_graph) 681 # functions. 682 in_grads = _MaybeCompile(grad_scope, op, func_call, --> 683 lambda: grad_fn(op, *out_grads)) 684 else: 685 # For function call ops, we add a 'SymbolicGradient' /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn) 334 xla_scope = op.get_attr("_XlaScope").decode() 335 except ValueError: --> 336 return grad_fn() # Exit early 337 338 if not xla_compile: /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in () 681 # functions. 682 in_grads = _MaybeCompile(grad_scope, op, func_call, --> 683 lambda: grad_fn(op, *out_grads)) 684 else: 685 # For function call ops, we add a 'SymbolicGradient' /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/cond_v2.py in _IfGrad(op, *grads) 119 # functions. 120 true_grad_graph = _create_grad_func( --> 121 true_graph, grads, util.unique_grad_fn_name(true_graph.name)) 122 false_grad_graph = _create_grad_func( 123 false_graph, grads, util.unique_grad_fn_name(false_graph.name)) /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/cond_v2.py in _create_grad_func(func_graph, grads, name) 382 name, 383 lambda: _grad_fn(func_graph, grads), [], {}, --> 384 func_graph=_CondGradFuncGraph(name, func_graph)) 385 386 /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 984 _, original_func = tf_decorator.unwrap(python_func) 985 --> 986 func_outputs = python_func(*func_args, **func_kwargs) 987 988 # invariant: `func_outputs` contains only Tensors, CompositeTensors, /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/cond_v2.py in () 381 return func_graph_module.func_graph_from_py_func( 382 name, --> 383 lambda: _grad_fn(func_graph, grads), [], {}, 384 func_graph=_CondGradFuncGraph(name, func_graph)) 385 /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/cond_v2.py in _grad_fn(func_graph, grads) 372 result = gradients_util._GradientsHelper( 373 ys, func_graph.inputs, grad_ys=grad_ys, --> 374 src_graph=func_graph) 375 376 return result /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients, src_graph) 681 # functions. 682 in_grads = _MaybeCompile(grad_scope, op, func_call, --> 683 lambda: grad_fn(op, *out_grads)) 684 else: 685 # For function call ops, we add a 'SymbolicGradient' /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn) 334 xla_scope = op.get_attr("_XlaScope").decode() 335 except ValueError: --> 336 return grad_fn() # Exit early 337 338 if not xla_compile: /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in () 681 # functions. 682 in_grads = _MaybeCompile(grad_scope, op, func_call, --> 683 lambda: grad_fn(op, *out_grads)) 684 else: 685 # For function call ops, we add a 'SymbolicGradient' /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _rewrite_forward_and_call_backward(self, op, *doutputs) 717 def _rewrite_forward_and_call_backward(self, op, *doutputs): 718 """Add outputs to the forward call and feed them to the grad function.""" --> 719 forward_function, backwards_function = self.forward_backward(len(doutputs)) 720 if not backwards_function.outputs: 721 return backwards_function.structured_outputs /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in forward_backward(self, num_doutputs) 626 if forward_backward is not None: 627 return forward_backward --> 628 forward, backward = self._construct_forward_backward(num_doutputs) 629 self._cached_function_pairs[num_doutputs] = (forward, backward) 630 return forward, backward /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _construct_forward_backward(self, num_doutputs) 674 args=[], kwargs={}, 675 signature=signature, --> 676 func_graph=backwards_graph) 677 backwards_graph_captures = backwards_graph.external_captures 678 captures_from_forward = [ /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 984 _, original_func = tf_decorator.unwrap(python_func) 985 --> 986 func_outputs = python_func(*func_args, **func_kwargs) 987 988 # invariant: `func_outputs` contains only Tensors, CompositeTensors, /usr/local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _backprop_function(*grad_ys) 664 self._func_graph.inputs, 665 grad_ys=grad_ys, --> 666 src_graph=self._func_graph) 667 668 with self._func_graph.as_default(): /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients, src_graph) 681 # functions. 682 in_grads = _MaybeCompile(grad_scope, op, func_call, --> 683 lambda: grad_fn(op, *out_grads)) 684 else: 685 # For function call ops, we add a 'SymbolicGradient' /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn) 334 xla_scope = op.get_attr("_XlaScope").decode() 335 except ValueError: --> 336 return grad_fn() # Exit early 337 338 if not xla_compile: /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py in () 681 # functions. 682 in_grads = _MaybeCompile(grad_scope, op, func_call, --> 683 lambda: grad_fn(op, *out_grads)) 684 else: 685 # For function call ops, we add a 'SymbolicGradient' /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/array_grad.py in _TileGrad(op, grad) 827 grad.values, math_ops.mod(grad.indices, input_shape_0), input_shape_0) 828 split_shape = array_ops.concat([[1], split_shape[1:]], axis=0) --> 829 input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes) 830 # Fix shape inference 831 if not context.executing_eagerly(): /usr/local/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs) 199 """Call target, and fall back on dispatchers if there is a TypeError.""" 200 try: --> 201 return target(*args, **kwargs) 202 except (TypeError, ValueError): 203 # Note: convert_to_eager_tensor currently raises a ValueError, not a /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py in reduce_sum(input_tensor, axis, keepdims, name) 1978 1979 return reduce_sum_with_dims(input_tensor, axis, keepdims, name, -> 1980 _ReductionDims(input_tensor, axis)) 1981 1982 /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py in reduce_sum_with_dims(input_tensor, axis, keepdims, name, dims) 1989 return _may_reduce_to_scalar( 1990 keepdims, axis, -> 1991 gen_math_ops._sum(input_tensor, dims, keepdims, name=name)) 1992 1993 /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gen_math_ops.py in _sum(input, axis, keep_dims, name) 10537 _, _, _op, _outputs = _op_def_library._apply_op_helper( 10538 "Sum", input=input, reduction_indices=axis, keep_dims=keep_dims, > 10539 name=name) 10540 _result = _outputs[:] 10541 if _execute.must_record_gradient(): /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(op_type_name, name, **keywords) 607 _SatisfiesTypeConstraint(base_type, 608 _Attr(op_def, input_arg.type_attr), --> 609 param_name=input_name) 610 attrs[input_arg.type_attr] = attr_value 611 inferred_from[input_arg.type_attr] = input_name /usr/local/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _SatisfiesTypeConstraint(dtype, attr_def, param_name) 59 "allowed values: %s" % 60 (param_name, dtypes.as_dtype(dtype).name, ---> 61 ", ".join(dtypes.as_dtype(x).name for x in allowed_list))) 62 63 TypeError: Value passed to parameter 'input' has DataType variant not in list of allowed values: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64 ```

System Details

```bash numpy - 1.18.5 tensorflow_probability - 0.12.0-dev20200723 tensorflow - 2.4.0-dev20200723 pip - 20.1.1 CPython - 3.7.7 IPython - 7.16.1 System - Darwin Mac 19.6.0 Darwin Kernel Version 19.6.0: Sun Jul 5 00:43:10 PDT 2020; root:xnu-6153.141.1~9/RELEASE_X86_64 x86_64 ```

If I do not use tf.vectorized_map, everything works good. I am not sure if this issue has to be opened on tensorflow side. Any help in this regard? Thanks

davmre commented 4 years ago

Thanks for reporting this; that's a pretty nasty error. It looks like it's not specific to the Autoregressive model; any instance of tfd.LinearGaussianStateSpaceModel has the same problem. I've simplified the failing example a bit, there's a runnable version in this colab: https://colab.research.google.com/drive/14ytDF-74jvDYtJXff0IktJLBin_BuKYJ. (EDIT: I further simplified the example and the linked colab; see my comment below)

# Generating data
np.random.seed(seed=42)
data=np.random.normal([100, 1]).astype(np.float32)

def log_prob(x):
  return tfd.LinearGaussianStateSpaceModel(
        num_timesteps=100,
        transition_matrix=tf.eye(1),
        transition_noise=tfd.MultivariateNormalDiag(loc=[x], scale_diag=[1]),
        observation_matrix=tf.eye(1),
        observation_noise=tfd.MultivariateNormalDiag(loc=[0.], scale_diag=[1.]),
        initial_state_prior=tfd.MultivariateNormalDiag(scale_diag=[1e-6]),
    ).log_prob(data)

def vectorize_function(function):
    def vectorizedfn(*q_samples):
        return tf.vectorized_map(
            lambda samples: function(*samples), q_samples)
    return vectorizedfn
v_log_prob = vectorize_function(log_prob)
print(v_log_prob(x))  # Works.

vfn = vectorize_function(log_prob)
with tf.GradientTape() as tape:
  lp = v_log_prob(x)
g = tape.gradient(lp, x)  # Raises exception.

Using vectorized_map to compute the log prob works, but the gradient raises an error very similar to the one you report: TypeError: Value passed to parameter 'input' has DataType variant not in list of allowed values: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64 (with a stack trace 67 frames deep).

davmre commented 4 years ago

Actually I managed to get this to an even simpler case that uses only TF (no TFP at all):

import numpy as np
import tensorflow as tf

# Generating data
np.random.seed(seed=42)
data = np.random.randn(100).astype(np.float32)

def log_prob(x):
  return tf.reduce_sum(tf.scan(
      lambda _, yi: (yi - x)**2,
      elems=data,
      initializer=tf.convert_to_tensor(0.),))

def vectorize_function(function):
    def vectorizedfn(*q_samples):
        return tf.vectorized_map(
            lambda samples: function(*samples), q_samples)
    return vectorizedfn

v_log_prob = vectorize_function(log_prob)
x = tf.Variable(tf.random.normal([10]))

with tf.GradientTape() as tape:
  lp = v_log_prob(x)
g = tape.gradient(lp, x)  # Raises exception.

The issue seems to be in the interaction of taking the gradient of a vectorized scan loop.

davmre commented 4 years ago

Since this looks like a TF bug, I went ahead and filed an issue with TF: https://github.com/tensorflow/tensorflow/issues/41789

I'm going to go ahead and close this issue for now, though feel free to reopen if something TFP-specific pops up.

Sayam753 commented 4 years ago

Hi @davmre Thanks a lot for debugging this issue. Let's see how this turns out to be in TF.

tensorflow / probability

Calculating log_prob fails using `tf.vectorized_map` #1018