Closed Amrmesi closed 3 days ago
it turns out the problem was that the iterator in the build graph has to handle the same output shapes and types from the training and validation and test datasets. The augmentation only happens to the training set that's why it gave me this error so I modified the get_data function in the distillation.py so that all the datasets are in the same shapes and types here is the updated function: def _get_data(self, paths, split_name, **config): is_training = split_name == 'training'
def _read_image(path):
image = tf.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
return image
def _create_npz_reader(keys):
def _read_npz(keys, path):
npz = np.load(path.decode('utf-8'))
return [npz[k].astype(np.float32) for k in keys]
return lambda x: _read_npz(keys, x)
def _preprocess(image):
if config['preprocessing']['resize']:
image = tf.image.resize_images(
image, config['preprocessing']['resize'],
method=tf.image.ResizeMethod.BILINEAR)
if config['preprocessing']['grayscale']:
image = tf.image.rgb_to_grayscale(image)
return image
def _delete_keys(data):
keys = ['keypoints']
for k in keys:
data.pop(k, None)
return data
# Create datasets for names and images
names = tf.data.Dataset.from_tensor_slices(paths['names'])
images = tf.data.Dataset.from_tensor_slices(paths['images'])
images = images.map_parallel(_read_image)
images = images.map_parallel(_preprocess)
dataset = tf.data.Dataset.zip({'image': images, 'name': names})
# Load targets
if config['load_targets']:
for i, target in enumerate(config['targets']):
t = tf.data.Dataset.from_tensor_slices(paths[i])
reader = _create_npz_reader(target['keys'])
types = [tf.float32] * len(target['keys'])
t = t.map_parallel(lambda p: tf.py_func(reader, [p], types))
dataset = tf.data.Dataset.zip((dataset, t)).map(
lambda da, de: {**da, **{k: de[j]
for j, k in enumerate(target['keys'])}})
if 'keypoints' in dataset.keys():
dataset = dataset.map(
lambda d: {
**d, 'keypoints': tf.reshape(
d['keypoints'][:, ::-1], [-1, 2])})
# Apply augmentations or additional preprocessing
if split_name in ['training', 'validation', 'test']:
if config['augmentation']['photometric']['enable']:
dataset = dataset.map_parallel(
lambda d: pipeline.photometric_augmentation(
d, **config['augmentation']['photometric']))
if config['augmentation']['homographic']['enable']:
dataset = dataset.map_parallel(
lambda d: pipeline.homographic_augmentation(
d, **config['augmentation']['homographic']))
print(f"Post-augmentation Dataset ({split_name}) output types:", dataset.output_types)
print(f"Post-augmentation Dataset ({split_name}) output shapes:", dataset.output_shapes)
# Ensure batching works for validation and test sets as well
if split_name == 'validation':
dataset = dataset.take(config['validation_size'])
if split_name == 'training':
dataset = dataset.skip(config['validation_size'])
if config['cache_in_memory']:
tf.logging.info('Caching dataset, first access will take some time')
dataset = dataset.cache()
if 'keypoints' in dataset.keys():
dataset = dataset.map_parallel(pipeline.add_keypoint_map)
if config['for_batching']:
dataset = dataset.map_parallel(_delete_keys)
return dataset
When I enable the homographic augmentation in the hfnet train config file the training always gives the InvalidArgumentError saying it expects 5 types and got 8. I tried debugging and found out that it provides valid_mask and local desc map valid mask and dense score valid mask. I have no idea why it isn't working this is the error: QUIRES failed at iterator_ops.cc:1181 : Invalid argument: Number of components does not match: expected 5 types but got 8. Traceback (most recent call last): File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1334, in _do_call return fn(*args) File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1319, in _run_fn options, feed_dict, fetch_list, target_list, run_metadata) File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1407, in _call_tf_sessionrun run_metadata) tensorflow.python.framework.errors_impl.InvalidArgumentError: Number of components does not match: expected 5 types but got 8. [[{{node IteratorFromStringHandleV2}} = IteratorFromStringHandleV2output_shapes=[, , [?,480,640,1], , [?]], output_types=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_STRING], _device="/job:localhost/replica:0/task:0/device:CPU:0" ]]
[[{{node MobilenetV2/expanded_conv_3/project/BatchNorm/gamma/read/_289}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_785_MobilenetV2/expanded_conv_3/project/BatchNorm/gamma/read", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "hfnet/train.py", line 82, in
_cli_train(config, output_dir)
File "hfnet/train.py", line 66, in _cli_train
train(config, config['train_iter'], output_dir)
File "hfnet/train.py", line 32, in train
keep_checkpoints=config.get('keep_checkpoints', 1))
File "/home/amrmesi/hfnet/hfnet/models/base_model.py", line 310, in train
feed_dict={self.handle: self.dataset_handles['training']})
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 929, in run
run_metadata_ptr)
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1152, in _run
feed_dict_tensor, options, run_metadata)
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1328, in _do_run
run_metadata)
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1348, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Number of components does not match: expected 5 types but got 8.
[[node IteratorFromStringHandleV2 (defined at /home/amrmesi/hfnet/hfnet/models/base_model.py:267) = IteratorFromStringHandleV2output_shapes=[, , [?,480,640,1], , [?]], output_types=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_STRING], _device="/job:localhost/replica:0/task:0/device:CPU:0" ]]
[[{{node MobilenetV2/expanded_conv_3/project/BatchNorm/gamma/read/_289}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_785_MobilenetV2/expanded_conv_3/project/BatchNorm/gamma/read", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
Caused by op 'IteratorFromStringHandleV2', defined at: File "hfnet/train.py", line 82, in
_cli_train(config, output_dir)
File "hfnet/train.py", line 66, in _cli_train
train(config, config['train_iter'], output_dir)
File "hfnet/train.py", line 22, in train
with _init_graph(config) as net:
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/contextlib.py", line 81, in enter
return next(self.gen)
File "hfnet/train.py", line 51, in _init_graph
data=dataset.get_tf_datasets(), n_gpus=n_gpus, *config['model'])
File "/home/amrmesi/hfnet/hfnet/models/base_model.py", line 125, in init
self._build_graph()
File "/home/amrmesi/hfnet/hfnet/models/base_model.py", line 267, in _build_graph
self.handle, output_types, output_shapes)
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 291, in from_string_handle
sparse.as_dense_shapes(output_shapes, output_classes)))
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 1982, in iterator_from_string_handle_v2
output_types=output_types, output_shapes=output_shapes, name=name)
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(args, **kwargs)
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
op_def=op_def)
File "/home/amrmesi/miniconda3/envs/hfnet/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in init
self._traceback = tf_stack.extract_stack()
InvalidArgumentError (see above for traceback): Number of components does not match: expected 5 types but got 8. [[node IteratorFromStringHandleV2 (defined at /home/amrmesi/hfnet/hfnet/models/base_model.py:267) = IteratorFromStringHandleV2output_shapes=[, , [?,480,640,1], , [?]], output_types=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_STRING], _device="/job:localhost/replica:0/task:0/device:CPU:0" ]]
[[{{node MobilenetV2/expanded_conv_3/project/BatchNorm/gamma/read/_289}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_785_MobilenetV2/expanded_conv_3/project/BatchNorm/gamma/read", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]