Training or predicting ...
Training
Traceback (most recent call last):
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1375, in _do_call
return fn(*args)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1360, in _run_fn
target_list, run_metadata)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1453, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.NotFoundError: Key bert/embeddings/LayerNorm/beta/accum not found in checkpoint
[[{{node save/RestoreV2}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1304, in restore
{self.saver_def.filename_tensor_name: save_path})
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 968, in run
run_metadata_ptr)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1191, in _run
feed_dict_tensor, options, run_metadata)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1369, in _do_run
run_metadata)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1394, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.NotFoundError: Key bert/embeddings/LayerNorm/beta/accum not found in checkpoint
[[node save/RestoreV2 (defined at /home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py:1510) ]]
Original stack trace for 'save/RestoreV2':
File "../tapas/run_task_main.py", line 908, in <module>
app.run(main)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 303, in run
_run_main(main, args)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "../tapas/run_task_main.py", line 893, in main
loop_predict=FLAGS.loop_predict,
File "../tapas/run_task_main.py", line 526, in _train_and_predict
max_steps=tapas_config.num_train_steps,
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3105, in train
saving_listeners=saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 349, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1175, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1208, in _train_model_default
saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1510, in _train_with_estimator_spec
save_graph_def=self._config.checkpoint_save_graph_def) as mon_sess:
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 605, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1039, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 750, in __init__
self._sess = _RecoverableSession(self._coordinated_creator)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1232, in __init__
_WrappedSession.__init__(self, self._create_session())
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1237, in _create_session
return self._sess_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 903, in create_session
self.tf_sess = self._session_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 661, in create_session
self._scaffold.finalize()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 244, in finalize
self._saver.build()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 848, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 886, in _build
build_restore=build_restore)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 510, in _build_internal
restore_sequentially, reshape)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 389, in _AddShardedRestoreOps
name="restore_shard"))
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 336, in _AddRestoreOps
restore_sequentially)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 583, in bulk_restore
return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1493, in restore_v2
name=name)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 750, in _apply_op_helper
attrs=attr_protos, op_def=op_def)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3565, in _create_op_internal
op_def=op_def)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2045, in __init__
self._traceback = tf_stack.extract_stack_for_node(self._c_op)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/py_checkpoint_reader.py", line 70, in get_tensor
self, compat.as_bytes(tensor_str))
RuntimeError: Key _CHECKPOINTABLE_OBJECT_GRAPH not found in checkpoint
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1314, in restore
names_to_keys = object_graph_key_mapping(save_path)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1632, in object_graph_key_mapping
object_graph_string = reader.get_tensor(trackable.OBJECT_GRAPH_PROTO_KEY)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/py_checkpoint_reader.py", line 74, in get_tensor
error_translator(e)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/py_checkpoint_reader.py", line 35, in error_translator
raise errors_impl.NotFoundError(None, None, error_message)
tensorflow.python.framework.errors_impl.NotFoundError: Key _CHECKPOINTABLE_OBJECT_GRAPH not found in checkpoint
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "../tapas/run_task_main.py", line 908, in <module>
app.run(main)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 303, in run
_run_main(main, args)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "../tapas/run_task_main.py", line 893, in main
loop_predict=FLAGS.loop_predict,
File "../tapas/run_task_main.py", line 526, in _train_and_predict
max_steps=tapas_config.num_train_steps,
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3110, in train
rendezvous.raise_errors()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/tpu/error_handling.py", line 150, in raise_errors
six.reraise(typ, value, traceback)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/six.py", line 703, in reraise
raise value
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3105, in train
saving_listeners=saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 349, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1175, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1208, in _train_model_default
saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1510, in _train_with_estimator_spec
save_graph_def=self._config.checkpoint_save_graph_def) as mon_sess:
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 605, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1039, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 750, in __init__
self._sess = _RecoverableSession(self._coordinated_creator)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1232, in __init__
_WrappedSession.__init__(self, self._create_session())
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1237, in _create_session
return self._sess_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 903, in create_session
self.tf_sess = self._session_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 670, in create_session
init_fn=self._scaffold.init_fn)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/session_manager.py", line 321, in prepare_session
config=config)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/session_manager.py", line 251, in _restore_checkpoint
sess, saver, ckpt.model_checkpoint_path)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/session_manager.py", line 71, in _restore_checkpoint_and_maybe_run_saved_model_initializers
saver.restore(sess, path)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1320, in restore
err, "a Variable name or other graph key that is missing")
tensorflow.python.framework.errors_impl.NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:
Key bert/embeddings/LayerNorm/beta/accum not found in checkpoint
[[node save/RestoreV2 (defined at /home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py:1510) ]]
Original stack trace for 'save/RestoreV2':
File "../tapas/run_task_main.py", line 908, in <module>
app.run(main)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 303, in run
_run_main(main, args)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "../tapas/run_task_main.py", line 893, in main
loop_predict=FLAGS.loop_predict,
File "../tapas/run_task_main.py", line 526, in _train_and_predict
max_steps=tapas_config.num_train_steps,
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3105, in train
saving_listeners=saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 349, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1175, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1208, in _train_model_default
saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1510, in _train_with_estimator_spec
save_graph_def=self._config.checkpoint_save_graph_def) as mon_sess:
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 605, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1039, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 750, in __init__
self._sess = _RecoverableSession(self._coordinated_creator)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1232, in __init__
_WrappedSession.__init__(self, self._create_session())
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1237, in _create_session
return self._sess_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 903, in create_session
self.tf_sess = self._session_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 661, in create_session
self._scaffold.finalize()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 244, in finalize
self._saver.build()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 848, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 886, in _build
build_restore=build_restore)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 510, in _build_internal
restore_sequentially, reshape)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 389, in _AddShardedRestoreOps
name="restore_shard"))
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 336, in _AddRestoreOps
restore_sequentially)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 583, in bulk_restore
return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1493, in restore_v2
name=name)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 750, in _apply_op_helper
attrs=attr_protos, op_def=op_def)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3565, in _create_op_internal
op_def=op_def)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2045, in __init__
self._traceback = tf_stack.extract_stack_for_node(self._c_op)
The tensorflow version in my machine is show as follows:
tensorflow 2.2.3 tensorflow-addons 0.14.0 tensorflow-datasets 4.4.0 tensorflow-estimator 2.5.0 tensorflow-gpu 2.5.0 tensorflow-hub 0.12.0 tensorflow-metadata 1.2.0 tensorflow-model-optimization 0.7.0 tensorflow-probability 0.12.0
If anybody can help me with this it would be greatly appreciated. Thanks so much for the work and time!
Due to lack of GPU memory,I set batch_size to 32 and gradient_accumulation_steps to 32 when fine-tunning in SQA. The scripts are as follows.
And I run into the error:
The tensorflow version in my machine is show as follows:
tensorflow 2.2.3 tensorflow-addons 0.14.0 tensorflow-datasets 4.4.0 tensorflow-estimator 2.5.0 tensorflow-gpu 2.5.0 tensorflow-hub 0.12.0 tensorflow-metadata 1.2.0 tensorflow-model-optimization 0.7.0 tensorflow-probability 0.12.0
If anybody can help me with this it would be greatly appreciated. Thanks so much for the work and time!