for model in ("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"):
cmd = f"timeout 125m predict.py -m {model} -g 2 -c ."
with open(f"{start_dir}/{base}_predict_{model}.out", "a") as f:
f.write(f"Processing file: {f}")
call(cmd, shell=True, stdout=f, stderr=f, executable='/bin/bash')
Models h and j fail with the following, the other models run fine.
pciBusID: 0000:18:00.0 name: Quadro P2000 computeCapability: 6.1
coreClock: 1.4805GHz coreCount: 8 deviceMemorySize: 4.93GiB deviceMemoryBandwidth: 130.53GiB/s
2023-09-25 05:12:09.291609: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2023-09-25 05:12:09.291640: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2023-09-25 05:12:09.291658: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10
2023-09-25 05:12:09.291676: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10
2023-09-25 05:12:09.291693: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10
2023-09-25 05:12:09.291711: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.10
2023-09-25 05:12:09.291728: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.10
2023-09-25 05:12:09.291746: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
2023-09-25 05:12:09.292371: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0
2023-09-25 05:12:09.292433: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2023-09-25 05:12:10.011767: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1261] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-09-25 05:12:10.011830: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1267] 0
2023-09-25 05:12:10.011842: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1280] 0: N
2023-09-25 05:12:10.013939: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1406] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 4582 MB memory) -> physical GPU (device: 0, name: Quadro P2000, pci bus id: 0000:18:00.0, compute capability: 6.1)
Traceback (most recent call last):
File "~/python/miniconda3/envs/kpe/bin/predict.py", line 79, in <module>
model = get_model(args.model)
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/fetch/utils.py", line 92, in get_model
model = model_from_yaml(y.read())
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/saving/model_config.py", line 105, in model_from_yaml
return deserialize(config, custom_objects=custom_objects)
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/layers/serialization.py", line 173, in deserialize
return generic_utils.deserialize_keras_object(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/utils/generic_utils.py", line 354, in deserialize_keras_object
return cls.from_config(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/engine/training.py", line 2261, in from_config
return functional.Functional.from_config(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/engine/functional.py", line 668, in from_config
input_tensors, output_tensors, created_layers = reconstruct_from_config(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/engine/functional.py", line 1275, in reconstruct_from_config
process_layer(layer_data)
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/engine/functional.py", line 1257, in process_layer
layer = deserialize_layer(layer_data, custom_objects=custom_objects)
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/layers/serialization.py", line 173, in deserialize
return generic_utils.deserialize_keras_object(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/utils/generic_utils.py", line 354, in deserialize_keras_object
return cls.from_config(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/engine/training.py", line 2261, in from_config
return functional.Functional.from_config(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/engine/functional.py", line 668, in from_config
input_tensors, output_tensors, created_layers = reconstruct_from_config(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/engine/functional.py", line 1275, in reconstruct_from_config
process_layer(layer_data)
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/engine/functional.py", line 1257, in process_layer
layer = deserialize_layer(layer_data, custom_objects=custom_objects)
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/layers/serialization.py", line 173, in deserialize
return generic_utils.deserialize_keras_object(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/utils/generic_utils.py", line 354, in deserialize_keras_object
return cls.from_config(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/layers/core.py", line 1019, in from_config
function = cls._parse_function_from_config(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/layers/core.py", line 1071, in _parse_function_from_config
function = generic_utils.func_load(
File "~/python/miniconda3/envs/kpe/lib/python3.9/site-packages/tensorflow/python/keras/utils/generic_utils.py", line 457, in func_load
code = marshal.loads(raw_code)
ValueError: bad marshal data (unknown type code)
Models
h
,j
fail.When running all the models with
Models
h
andj
fail with the following, the other models run fine.