InternalError Traceback (most recent call last)
<ipython-input-4-0e66376570e5> in <module>
15
16 # Train and evaluate using tf.keras.Model.fit()
---> 17 history = model.fit(train_dataset, epochs=2, steps_per_epoch=115,
18 validation_data=valid_dataset, validation_steps=7)
19
~/miniforge3/envs/tfm1/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1098 _r=1):
1099 callbacks.on_train_batch_begin(step)
-> 1100 tmp_logs = self.train_function(iterator)
1101 if data_handler.should_sync:
1102 context.async_wait()
~/miniforge3/envs/tfm1/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
826 tracing_count = self.experimental_get_tracing_count()
827 with trace.Trace(self._name) as tm:
--> 828 result = self._call(*args, **kwds)
829 compiler = "xla" if self._experimental_compile else "nonXla"
830 new_tracing_count = self.experimental_get_tracing_count()
~/miniforge3/envs/tfm1/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
853 # In this case we have created variables on the first call, so we run the
854 # defunned version which is guaranteed to never create variables.
--> 855 return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
856 elif self._stateful_fn is not None:
857 # Release the lock early so that multiple threads can perform the call
~/miniforge3/envs/tfm1/lib/python3.8/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
2940 (graph_function,
2941 filtered_flat_args) = self._maybe_define_function(args, kwargs)
-> 2942 return graph_function._call_flat(
2943 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
2944
~/miniforge3/envs/tfm1/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1916 and executing_eagerly):
1917 # No tape is watching; skip to running the function.
-> 1918 return self._build_call_outputs(self._inference_function.call(
1919 ctx, args, cancellation_manager=cancellation_manager))
1920 forward_backward = self._select_forward_and_backward_functions(
~/miniforge3/envs/tfm1/lib/python3.8/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
553 with _InterpolateFunctionError(self):
554 if cancellation_manager is None:
--> 555 outputs = execute.execute(
556 str(self.signature.name),
557 num_outputs=self._num_outputs,
~/miniforge3/envs/tfm1/lib/python3.8/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
57 try:
58 ctx.ensure_initialized()
---> 59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
InternalError: TransposeMLCBytes: Input bytes is nullptr.
[[node gradient_tape/tf_bert_for_sequence_classification/bert/encoder/layer_._11/intermediate/dense/MLCMatMul_1 (defined at /Users/ronj/miniforge3/envs/tfm1/lib/python3.8/site-packages/tensorflow/compiler/tf2mlcompute/ops/gen_mlc_ops.py:5535) ]]
[[MLCSubgraphOp_0_39]] [Op:__inference_train_function_36201]
Errors may have originated from an input operation.
Input Source operations connected to node gradient_tape/tf_bert_for_sequence_classification/bert/encoder/layer_._11/intermediate/dense/MLCMatMul_1:
tf_bert_for_sequence_classification/bert/encoder/layer_._11/attention/output/LayerNorm/MLCLayerNorm (defined at /Users/ronj/miniforge3/envs/tfm1/lib/python3.8/site-packages/tensorflow/compiler/tf2mlcompute/ops/gen_mlc_ops.py:4934)
Adam/gradients/AddN (defined at <ipython-input-4-0e66376570e5>:17
This error occurs about 2/3rd of the way of 1 epoch. I was trying to do a quick eval of TF for M1 by following the "Quick tour TF 2.0 training and PyTorch interoperability" section of this page:
https://github.com/lcskrishna/transformers
Since it was in the middle of an epoch when this error happened, im not sure why there would be null values here suddenly, i'll try to do some more testing surrounding this.
This error occurs about 2/3rd of the way of 1 epoch. I was trying to do a quick eval of TF for M1 by following the "Quick tour TF 2.0 training and PyTorch interoperability" section of this page: https://github.com/lcskrishna/transformers
Since it was in the middle of an epoch when this error happened, im not sure why there would be null values here suddenly, i'll try to do some more testing surrounding this.