Even, when I explicitly specify the model_dir, I get the following error
Training for 420 steps
2020-11-24 20:34:50.920192: W tensorflow/core/distributed_runtime/rpc/grpc_session.cc:370] GrpcSession::ListDevices will initialize the session with an empty graph and other defaults because the session has not yet been created.
Building model...
ERROR:tensorflow:Error recorded from training_loop: Couldn't find 'checkpoint' file or checkpoints in given directory <model_dir>
Traceback (most recent call last):
File "electra_small/run_finetuning.py", line 323, in <module>
main()
File "electra_small/run_finetuning.py", line 319, in main
args.model_name, args.data_dir, **hparams))
File "electra_small/run_finetuning.py", line 270, in run_finetuning
model_runner.train()
File "electra_small/run_finetuning.py", line 183, in train
input_fn=self._train_input_fn, max_steps=self.train_steps)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3035, in train
rendezvous.raise_errors()
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/tpu/error_handling.py", line 136, in raise_errors
six.reraise(typ, value, traceback)
File "/usr/local/lib/python3.5/dist-packages/six.py", line 703, in reraise
raise value
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3030, in train
saving_listeners=saving_listeners)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1191, in _train_model_default
features, labels, ModeKeys.TRAIN, self.config)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 2857, in _call_model_fn
config)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1149, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3159, in _model_fn
_train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3604, in _train_on_tpu_system
device_assignment=ctx.device_assignment)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_core/python/tpu/tpu.py", line 1277, in split_compile_and_shard
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_core/python/tpu/tpu.py", line 992, in split_compile_and_replicate
outputs = computation(*computation_inputs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3589, in multi_tpu_train_steps_on_single_shard
inputs=[0, _INITIAL_LOSS])
File "/usr/local/lib/python3.5/dist-packages/tensorflow_core/python/tpu/training_loop.py", line 178, in while_loop
condition_wrapper, body_wrapper, inputs, name="", parallel_iterations=1)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_core/python/ops/control_flow_ops.py", line 2753, in while_loop
return_same_structure)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_core/python/ops/control_flow_ops.py", line 2245, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_core/python/ops/control_flow_ops.py", line 2170, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_core/python/tpu/training_loop.py", line 121, in body_wrapper
outputs = body(*(inputs + dequeue_ops))
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3588, in <lambda>
lambda i, loss: [i + 1, single_tpu_train_step(i)],
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 1715, in train_step
self._call_model_fn(features, labels))
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 1994, in _call_model_fn
estimator_spec = self._model_fn(features=features, **kwargs)
File "electra_small/run_finetuning.py", line 96, in model_fn
tvars, init_checkpoint)
File "/home/etetteh/electra_small/model/modeling.py", line 351, in get_assignment_map_from_checkpoint
for x in tf.train.list_variables(init_checkpoint):
File "/usr/local/lib/python3.5/dist-packages/tensorflow_core/python/training/checkpoint_utils.py", line 97, in list_variables
reader = load_checkpoint(ckpt_dir_or_file)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_core/python/training/checkpoint_utils.py", line 65, in load_checkpoint
"given directory %s" % ckpt_dir_or_file)
ValueError: Couldn't find 'checkpoint' file or checkpoints in given directory <model_dir>
I am finetuning my trained electra model, on a NER task.
Even, when I explicitly specify the model_dir, I get the following error