Open FabianSchuetze opened 1 year ago
We do support tf.tensor_scatter_nd_update() function now. Could you please update the example code including data preparation and caller so we can have a local debug for further information? Thanks.
Thanks a lot for your kind and informative reply, @fatcat-z . Your answer provide me with hope that we can find a solution to the problem. I have create a one-layer network that provokes the same error as reported above. The tensorflow frozen graph is generated with the following function:
import os
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.convert_to_constants import (
convert_variables_to_constants_v2,
)
def ArIoU(bx1, bx2, angle_modulo):
cos_ph = tf.math.cos(- 1 * tf.gather(bx1, 4, axis=1))
sin_ph = tf.math.sin(- 1 * tf.gather(bx1, 4, axis=1))
rx = cos_ph * (tf.gather(bx2, 0, axis=1) - tf.gather(bx1, 0, axis=1))\
- sin_ph * (tf.gather(bx2, 1, axis=1) - tf.gather(bx1, 1, axis=1))
ry = sin_ph * (tf.gather(bx2, 0, axis=1) - tf.gather(bx1, 0, axis=1))\
+ cos_ph * (tf.gather(bx2, 1, axis=1) - tf.gather(bx1, 1, axis=1))
iw = tf.minimum(rx + tf.gather(bx2, 2, axis=1), tf.gather(bx1, 2, axis=1))\
- tf.maximum(rx - tf.gather(bx2, 2, axis=1), -
1 * tf.gather(bx1, 2, axis=1))
iw = tf.maximum(iw, 0)
ih = tf.minimum(ry + tf.gather(bx2, 3, axis=1), tf.gather(bx1, 3, axis=1))\
- tf.maximum(rx - tf.gather(bx2, 3, axis=1), -
1 * tf.gather(bx1, 3, axis=1))
# for all ih < 0 values, the results should be 0
area_i = 4 * tf.gather(bx1, 2, axis=1) * tf.gather(bx1, 3, axis=1)
area_j = 4 * tf.gather(bx2, 2, axis=1) * tf.gather(bx2, 3, axis=1)
ua = area_i + area_j - iw * ih
angle = tf.abs(tf.gather(bx1, 4, axis=1) - tf.gather(bx2, 4, axis=1))
zero = tf.constant(0.0, tf.float32)
ar = tf.maximum(zero, tf.cos(tf.minimum(angle, angle_modulo - angle)))
res = tf.where(ih > 0, ar * iw * ih / ua, iw)
return res
def nms_ar_tf(boxes, scores, nms_threshold, score_threhold=0.05):
"""Sort confidence scores of predicted bounding boxes."""
threshold = tf.constant(nms_threshold, tf.float32)
bs = len(boxes)
out_boxes = tf.zeros((bs, 300, 5), dtype=tf.float32)
out_scores = tf.zeros((bs, 300), dtype=tf.float32)
for step in range(bs):
idx = 0
scores_sorted = tf.sort(
tf.gather(scores, step, axis=0),
axis=-1, direction='DESCENDING')
B = tf.argsort(
tf.gather(scores, step, axis=0),
axis=-1, direction='DESCENDING')
B = tf.boolean_mask(B, tf.greater(scores_sorted, score_threhold))
def func(B, out_boxes, out_scores, idx, step):
i = B[0]
out_boxes = tf.tensor_scatter_nd_update(
out_boxes,
[[step, idx, 0], [step, idx, 1], [step, idx, 2],
[step, idx, 3], [step, idx, 4]],
[boxes[step, i, 0], boxes[step, i, 1], boxes[step, i, 2],
boxes[step, i, 3], boxes[step, i, 4]])
out_scores = tf.tensor_scatter_nd_update(
out_scores, [[step, idx]], [scores[step, i]])
idx += 1
if tf.size(B) == 1:
B = tf.zeros([0], dtype='int32')
else:
iou = ArIoU(tf.reshape(boxes[step, i, :], (-1, 5)),
tf.gather(boxes[step], B[1:]), 3.14)
iou = tf.reshape(iou, (-1,))
inds = tf.reshape(tf.where(tf.less(iou, threshold)), (-1,))
B = tf.gather(B, inds + 1)
return B, out_boxes, out_scores, idx, step
def cond(a, b, c, d, e): return tf.size(a) > 0
B, out_boxes, out_scores, idx, step =\
tf.while_loop(cond, func,
(B, out_boxes, out_scores, idx, step),
parallel_iterations=1,
shape_invariants=(tf.TensorShape([None]),
out_boxes.shape,
out_scores.shape, None, None))
return out_boxes, out_scores
class SmallNetwork(tf.keras.layers.Layer):
"""Keras layer for filtering detections using score threshold and NMS"""
def __init__(
self,
nms_threshold=0.5,
score_threshold=0.05,
**kwargs
):
self.nms_threshold = nms_threshold
self.score_threshold = score_threshold
self.max_detections = 300
super().__init__(**kwargs)
def call(self, inputs, **kwargs):
boxes = inputs[0]
classes = inputs[1]
boxes2 = tf.reshape(boxes, (tf.shape(boxes)[0], -1, 5))
scores2 = tf.reshape(classes, (tf.shape(classes)[0], -1))
outputs = nms_ar_tf(
boxes=boxes2,
scores=scores2,
nms_threshold=self.nms_threshold,
score_threhold=self.score_threshold)
new_outputs = (tf.cast(outputs[0], tf.float32),
tf.cast(outputs[1], tf.float32))
return new_outputs
def compute_output_shape(self, input_shape):
return [
(input_shape[0][0], self.max_detections, 5),
(input_shape[1][0], self.max_detections),
]
def compute_mask(self, inputs, mask=None):
"""Required in Keras when there is more than 1 output"""
return (len(inputs) + 1) * [None]
def get_config(self):
config = super(SmallNetwork, self).get_config()
config.update(
{
"nms_threshold": self.nms_threshold,
"score_threshold": self.score_threshold,
"max_detections": self.max_detections,
}
)
return config
def freeze_graph(model):
# Convert Keras model to ConcreteFunction
full_model = tf.function(lambda input_1: model(input_1))
full_model = full_model.get_concrete_function(
(tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype),
tf.TensorSpec(model.inputs[1].shape, model.inputs[1].dtype)))
out_path = os.path.join('/tmp')
frozen_func = convert_variables_to_constants_v2(full_model)
frozen_func.graph.as_graph_def()
tf.io.write_graph(
graph_or_graph_def=frozen_func.graph,
logdir=out_path,
name="frozen_inference_graph.pb",
as_text=False,
)
def test_filter_detections(network):
# two boxes with skew IoU = 0.41 and ar IoU 0.24
boxes = tf.constant(
np.array([[0, 0, 20, 20, 0],
[0, 0, 40, 10, 0.25 * np.pi],
[0, 0, 38, 10.5, 0.25 * np.pi]
]),
dtype=tf.keras.backend.floatx(),
)
boxes = tf.reshape(boxes, (1, 3, 5))
scores = tf.constant(
np.array([[0.9], [1], [0.5]]), dtype=tf.keras.backend.floatx()
)
scores = tf.reshape(scores, (1, 3))
res = network((boxes, scores))
return res
if __name__ == "__main__":
i1 = [tf.keras.Input(shape=(None, 5)), tf.keras.Input(shape=(None,))]
small = SmallNetwork()
outputs = small(i1)
model = tf.keras.Model(inputs=i1, outputs=outputs)
test_filter_detections(model)
freeze_graph(model)
Running this script generates a frozen_graph at /tmp/frozen_inference_graph.pb
. Then, I can attempt to convert the network to onnx with:
python3 -m tf2onnx.convert --input /tmp/frozen_inference_graph.pb --output /tmp/model.onnx --outputs Identity:0,Identity_1:0 --inputs input_1:0,input_1_1:0 --opset 13
The conversion fails with the following output (similar to the output I wrote above):
2022-11-15 08:58:38.973764: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-15 08:58:39.127360: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-15 08:58:39.127389: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-11-15 08:58:39.164798: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-15 08:58:40.204007: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-11-15 08:58:40.204163: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory
2022-11-15 08:58:40.204184: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
/usr/lib/python3.8/runpy.py:127: RuntimeWarning: 'tf2onnx.convert' found in sys.modules after import of package 'tf2onnx', but prior to execution of 'tf2onnx.convert'; this may result in unpredictable behaviour
warn(RuntimeWarning(msg))
2022-11-15 08:58:41.671642: E tensorflow/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-11-15 08:58:41.671701: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (rc-016): /proc/driver/nvidia/version does not exist
2022-11-15 08:58:41.672288: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
WARNING:tensorflow:From /home/schuetze/.local/lib/python3.8/site-packages/tf2onnx/tf_loader.py:302: convert_variables_to_constants (from tensorflow.python.framework.graph_util_impl) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
2022-11-15 08:58:41,798 - WARNING - From /home/schuetze/.local/lib/python3.8/site-packages/tf2onnx/tf_loader.py:302: convert_variables_to_constants (from tensorflow.python.framework.graph_util_impl) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
WARNING:tensorflow:From /home/schuetze/.local/lib/python3.8/site-packages/tensorflow/python/framework/convert_to_constants.py:936: extract_sub_graph (from tensorflow.python.framework.graph_util_impl) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
2022-11-15 08:58:41,799 - WARNING - From /home/schuetze/.local/lib/python3.8/site-packages/tensorflow/python/framework/convert_to_constants.py:936: extract_sub_graph (from tensorflow.python.framework.graph_util_impl) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
2022-11-15 08:58:41.835066: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2022-11-15 08:58:41.835280: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2022-11-15 08:58:41,972 - INFO - Using tensorflow=2.10.0, onnx=1.12.0, tf2onnx=1.13.0/2c1db5
2022-11-15 08:58:41,972 - INFO - Using opset <onnx, 13>
Traceback (most recent call last):
File "/home/schuetze/.local/lib/python3.8/site-packages/tensorflow/python/framework/importer.py", line 500, in _import_graph_def_internal
results = c_api.TF_GraphImportGraphDefWithResults(
tensorflow.python.framework.errors_impl.InvalidArgumentError: slice index 0 of dimension 0 out of bounds. for '{{node model/small_network/while/body/_1/model/small_network/while/while/body/_106/model/small_network/while/while/strided_slice}} = StridedSlice[Index=DT_INT32, T=DT_INT32, _output_shapes=[<unknown>], begin_mask=0, ellipsis_mask=0, end_mask=0, new_axis_mask=0, shrink_axis_mask=1](Func/model/small_network/while/body/_1/model/small_network/while/while/body/_106/input/_187, model/small_network/while/body/_1/model/small_network/while/while/body/_106/model/small_network/while/while/strided_slice/stack, model/small_network/while/body/_1/model/small_network/while/while/body/_106/model/small_network/while/while/strided_slice/stack_1, model/small_network/while/body/_1/model/small_network/while/while/body/_106/model/small_network/while/while/strided_slice/stack_2)' with input shapes: [0], [1], [1], [1] and with computed input tensors: input[1] = <0>, input[2] = <1>, input[3] = <1>.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/schuetze/.local/lib/python3.8/site-packages/tf2onnx/convert.py", line 706, in <module>
main()
File "/home/schuetze/.local/lib/python3.8/site-packages/tf2onnx/convert.py", line 269, in main
model_proto, _ = _convert_common(
File "/home/schuetze/.local/lib/python3.8/site-packages/tf2onnx/convert.py", line 164, in _convert_common
g = process_tf_graph(tf_graph, const_node_values=const_node_values,
File "/home/schuetze/.local/lib/python3.8/site-packages/tf2onnx/tfonnx.py", line 459, in process_tf_graph
main_g, subgraphs = graphs_from_tf(tf_graph, input_names, output_names, shape_override, const_node_values,
File "/home/schuetze/.local/lib/python3.8/site-packages/tf2onnx/tfonnx.py", line 492, in graphs_from_tf
tf_graph = infer_shape(tf_graph, shape_override)
File "/home/schuetze/.local/lib/python3.8/site-packages/tf2onnx/shape_inference.py", line 31, in infer_shape
tf_graph = infer_shape_for_graph(tf_graph)
File "/home/schuetze/.local/lib/python3.8/site-packages/tf2onnx/shape_inference.py", line 82, in infer_shape_for_graph
tf_graph = tf_reload_graph(tf_graph)
File "/home/schuetze/.local/lib/python3.8/site-packages/tf2onnx/tf_loader.py", line 738, in tf_reload_graph
tf.import_graph_def(graph_def, name="")
File "/home/schuetze/.local/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py", line 561, in new_func
return func(*args, **kwargs)
File "/home/schuetze/.local/lib/python3.8/site-packages/tensorflow/python/framework/importer.py", line 403, in import_graph_def
return _import_graph_def_internal(
File "/home/schuetze/.local/lib/python3.8/site-packages/tensorflow/python/framework/importer.py", line 505, in _import_graph_def_internal
raise ValueError(str(e))
ValueError: slice index 0 of dimension 0 out of bounds. for '{{node model/small_network/while/body/_1/model/small_network/while/while/body/_106/model/small_network/while/while/strided_slice}} = StridedSlice[Index=DT_INT32, T=DT_INT32, _output_shapes=[<unknown>], begin_mask=0, ellipsis_mask=0, end_mask=0, new_axis_mask=0, shrink_axis_mask=1](Func/model/small_network/while/body/_1/model/small_network/while/while/body/_106/input/_187, model/small_network/while/body/_1/model/small_network/while/while/body/_106/model/small_network/while/while/strided_slice/stack, model/small_network/while/body/_1/model/small_network/while/while/body/_106/model/small_network/while/while/strided_slice/stack_1, model/small_network/while/body/_1/model/small_network/while/while/body/_106/model/small_network/while/while/strided_slice/stack_2)' with input shapes: [0], [1], [1], [1] and with computed input tensors: input[1] = <0>, input[2] = <1>, input[3] = <1>.
Do you have idea what might causes this problem?
Update 1 Sorry, there was a small bug in the implementation of the network that came to light when I run the network on test data. I corrected this bug but the conversion error still remains the same. You can see the update code snipped and a test case above.
I run the following function in a
tf.while_loop
and including on line relating totf.tensor_scatter_nd_update
leads to a tf2onnx conversion failure. The function is (excerpts of it):Commenting out this line
makes all the difference whether I can convert the tf frozen graph to onnx. The stacktrace is:
What I am trying to do here is to do nms with oriented bounding boxes and fill the tensor
out_boxes
at position(step, idx, : )
with elements from the incoming predicted bounding boxes at position(step, i, : )
withi
being the box that is a box with the highest score not suppressed by other boxes.The code runs well through a evaluation session in tensorflow and can generate output without problems. Unfortunately, the error (coming eventually from tensorflow) is cryptic to me and I do not understand there errors pertaining to the invalid shapes.
Does anybody have an idea how I could fix these this issue? Judging from this issue I am not sure if tf.tensor_scatter_nd_update` is supported. If not, can you recommend a ideom to achieve the same effect (Insert elements into a fixed size tensor) ?