gmalivenko / onnx2keras

Convert ONNX model graph to Keras model format.
MIT License
195 stars 116 forks source link

dynamic_axes not supported #128

Open londumas opened 3 years ago

londumas commented 3 years ago

It seems that dynamic_axes are not supported for other axes than the batch one. See minimal example code bellow:

import numpy as np
import torch
import onnxruntime
import onnx
import onnx2keras

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
class Net(torch.nn.Module):

    def __init__(self, input_size, output_size):
        super(Net, self).__init__()

        self.conv = torch.nn.Conv2d(input_size, output_size, kernel_size=1)
    def forward(self, x):

        x = self.conv(x)

        return x

batch_size = 1
seq_length = 2

input_size = 5
output_size = 10

path_to_save = 'model'

np.random.seed(42)
_ = torch.manual_seed(42)

x = np.random.randn(batch_size, input_size, seq_length, seq_length).astype(np.float32)

### Torch
model = Net(input_size=input_size, output_size=output_size)

y = to_numpy(model(torch.FloatTensor(x)))
y_torch = y.copy()

torch.save(model, "{}.pth".format(path_to_save))

### Torch -> ONNX

model = torch.load("{}.pth".format(path_to_save))

torch.onnx.export(model,
    torch.FloatTensor(x),
    "{}.onnx".format(path_to_save),
    input_names = ['input'],
    output_names = ['output'],
    dynamic_axes={
        'input' : {0 : 'batch_size', 2:'seq_lengths1',3:'seq_lengths2'},    # variable lenght axes
        'output' : {0 : 'batch_size', 2:'seq_lengths1',3:'seq_lengths2'},
    },
)

### Run ONNX
ort_session = onnxruntime.InferenceSession("{}.onnx".format(path_to_save))

ort_inputs = {ort_session.get_inputs()[0].name: x}
y = ort_session.run(None, ort_inputs)[0]

print("Maximum difference: ", np.absolute(y-y_torch).max(), ( (y-y_torch)**2 ).max() )

### ONNX -> KERAS

model = onnx.load('{}.onnx'.format(path_to_save))
model = onnx2keras.onnx_to_keras(model, ['input'], change_ordering=True, verbose=True)
INFO:onnx2keras:Converter is called.
DEBUG:onnx2keras:List input shapes:
DEBUG:onnx2keras:None
DEBUG:onnx2keras:List inputs:
DEBUG:onnx2keras:Input 0 -> input.
DEBUG:onnx2keras:List outputs:
DEBUG:onnx2keras:Output 0 -> output.
DEBUG:onnx2keras:Gathering weights to dictionary.
DEBUG:onnx2keras:Found weight conv.weight with shape (10, 5, 1, 1).
DEBUG:onnx2keras:Found weight conv.bias with shape (10,).
DEBUG:onnx2keras:Found input input with shape [5, 0, 0]
DEBUG:onnx2keras:######
DEBUG:onnx2keras:...
DEBUG:onnx2keras:Converting ONNX operation
DEBUG:onnx2keras:type: Conv
DEBUG:onnx2keras:node_name: output
DEBUG:onnx2keras:node_params: {'dilations': [1, 1], 'group': 1, 'kernel_shape': [1, 1], 'pads': [0, 0, 0, 0], 'strides': [1, 1], 'change_ordering': True, 'name_policy': None}
DEBUG:onnx2keras:...
DEBUG:onnx2keras:Check if all inputs are available:
DEBUG:onnx2keras:Check input 0 (name input).
DEBUG:onnx2keras:Check input 1 (name conv.weight).
DEBUG:onnx2keras:The input not found in layers / model inputs.
DEBUG:onnx2keras:Found in weights, add as a numpy constant.
DEBUG:onnx2keras:Check input 2 (name conv.bias).
DEBUG:onnx2keras:The input not found in layers / model inputs.
DEBUG:onnx2keras:Found in weights, add as a numpy constant.
DEBUG:onnx2keras:... found all, continue
DEBUG:onnx2keras:conv:Conv with bias
DEBUG:onnx2keras:conv:2D convolution
Maximum difference:  1.1920929e-07 1.4210855e-14

InvalidArgumentErrorTraceback (most recent call last)
~/.local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs, op_def)
   1811   try:
-> 1812     c_op = pywrap_tf_session.TF_FinishOperation(op_desc)
   1813   except errors.InvalidArgumentError as e:

InvalidArgumentError: Negative dimension size caused by subtracting 1 from 0 for '{{node output/Conv2D_9}} = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](input_9, output/Conv2D_9/ReadVariableOp)' with input shapes: [?,5,0,0], [1,1,5,10].

During handling of the above exception, another exception occurred:

ValueErrorTraceback (most recent call last)
<ipython-input-10-b67f96df5cc4> in <module>
     67 
     68 model = onnx.load('{}.onnx'.format(path_to_save))
---> 69 model = onnx2keras.onnx_to_keras(model, ['input'], change_ordering=True, verbose=True)

~/.local/lib/python3.7/site-packages/onnx2keras/converter.py in onnx_to_keras(onnx_model, input_names, input_shapes, name_policy, verbose, change_ordering)
    179             lambda_funcs,
    180             node_name,
--> 181             keras_names
    182         )
    183         if isinstance(keras_names, list):

~/.local/lib/python3.7/site-packages/onnx2keras/convolution_layers.py in convert_conv(node, params, layers, lambda_func, node_name, keras_name)
    175             )
    176 
--> 177             layers[node_name] = conv(input_0)
    178     else:
    179         # 1D conv

~/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
    924     if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
    925       return self._functional_construction_call(inputs, args, kwargs,
--> 926                                                 input_list)
    927 
    928     # Maintains info about the `Layer.call` stack.

~/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
   1115           try:
   1116             with ops.enable_auto_cast_variables(self._compute_dtype_object):
-> 1117               outputs = call_fn(cast_inputs, *args, **kwargs)
   1118 
   1119           except errors.OperatorNotAllowedInGraphError as e:

~/.local/lib/python3.7/site-packages/tensorflow/python/keras/layers/convolutional.py in call(self, inputs)
    245       inputs = array_ops.pad(inputs, self._compute_causal_padding(inputs))
    246 
--> 247     outputs = self._convolution_op(inputs, self.kernel)
    248 
    249     if self.use_bias:

~/.local/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
    199     """Call target, and fall back on dispatchers if there is a TypeError."""
    200     try:
--> 201       return target(*args, **kwargs)
    202     except (TypeError, ValueError):
    203       # Note: convert_to_eager_tensor currently raises a ValueError, not a

~/.local/lib/python3.7/site-packages/tensorflow/python/ops/nn_ops.py in convolution_v2(input, filters, strides, padding, data_format, dilations, name)
   1016       data_format=data_format,
   1017       dilations=dilations,
-> 1018       name=name)
   1019 
   1020 

~/.local/lib/python3.7/site-packages/tensorflow/python/ops/nn_ops.py in convolution_internal(input, filters, strides, padding, data_format, dilations, name, call_from_convolution, num_spatial_dims)
   1146           data_format=data_format,
   1147           dilations=dilations,
-> 1148           name=name)
   1149     else:
   1150       if channel_index == 1:

~/.local/lib/python3.7/site-packages/tensorflow/python/ops/nn_ops.py in _conv2d_expanded_batch(input, filters, strides, padding, data_format, dilations, name)
   2590         data_format=data_format,
   2591         dilations=dilations,
-> 2592         name=name)
   2593   return squeeze_batch_dims(
   2594       input,

~/.local/lib/python3.7/site-packages/tensorflow/python/ops/gen_nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name)
    977                   padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
    978                   explicit_paddings=explicit_paddings,
--> 979                   data_format=data_format, dilations=dilations, name=name)
    980   _result = _outputs[:]
    981   if _execute.must_record_gradient():

~/.local/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
    742       op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    743                                  name=scope, input_types=input_types,
--> 744                                  attrs=attr_protos, op_def=op_def)
    745 
    746     # `outputs` is returned as a separate return value so that the output

~/.local/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
    591     return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    592         op_type, inputs, dtypes, input_types, name, attrs, op_def,
--> 593         compute_device)
    594 
    595   def capture(self, tensor, name=None, shape=None):

~/.local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
   3483           input_types=input_types,
   3484           original_op=self._default_original_op,
-> 3485           op_def=op_def)
   3486       self._create_op_helper(ret, compute_device=compute_device)
   3487     return ret

~/.local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
   1973         op_def = self._graph._get_op_def(node_def.op)
   1974       self._c_op = _create_c_op(self._graph, node_def, inputs,
-> 1975                                 control_input_ops, op_def)
   1976       name = compat.as_str(node_def.name)
   1977     # pylint: enable=protected-access

~/.local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs, op_def)
   1813   except errors.InvalidArgumentError as e:
   1814     # Convert to ValueError for backwards compatibility.
-> 1815     raise ValueError(str(e))
   1816 
   1817   return c_op

ValueError: Negative dimension size caused by subtracting 1 from 0 for '{{node output/Conv2D_9}} = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](input_9, output/Conv2D_9/ReadVariableOp)' with input shapes: [?,5,0,0], [1,1,5,10].
tea1528 commented 3 years ago

@londumas Did you find a solution to this problem?

londumas commented 3 years ago

@tea1528, yes, one way is to convert torch -> tensorflow, by hand. This is sad, but works very well.