onnx / keras-onnx

Convert tf.keras/Keras models to ONNX
Apache License 2.0
379 stars 109 forks source link

Error converting Keras model BILSTM with Attention custom layer to ONNX #293

Open yuvalshachaf opened 4 years ago

yuvalshachaf commented 4 years ago

Hi there, I have been trying to convert a simple Keras BiLSTM (or LSTM) with Attention model to ONNX.
It keeps failing during onnx model save.

The error message I am getting is TypeError: object of type 'NoneType' has no len()

Drilling down into the error leads to a specific line in the Attention Class: weighted_input = x * K.expand_dims(a)

I am using the latest onnx however with TF 1.4.0 and Keras 2.1.5

code custom layer is:

Attention layer code taken from: https://gist.github.com/cbaziotis/6428df359af27d58078ca5ed9792bd6d

AttentionWithContext layer code taken from https://gist.github.com/cbaziotis/7ef97ccf71cbc14366835198c09809d2

import numpy as np from keras import backend as K, initializers, regularizers, constraints

from keras.layers import Layer from sklearn import metrics import tensorflow as tf from sentiment_parallel.configuration_manager.config_manager import config

def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow':

todo: check that this is correct

    return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
else:
    return K.dot(x, kernel)

class Attention(Layer): def init(self, W_regularizer=None, b_regularizer=None, W_constraint=None, b_constraint=None, bias=True, return_attention=False, **kwargs):

    self.supports_masking = True
    self.return_attention = return_attention
    self.init = initializers.get('glorot_uniform')

    self.W_regularizer = regularizers.get(W_regularizer)
    self.b_regularizer = regularizers.get(b_regularizer)

    self.W_constraint = constraints.get(W_constraint)
    self.b_constraint = constraints.get(b_constraint)

    self.bias = bias
    super(Attention, self).__init__(**kwargs)

def build(self, input_shape):
    print("in build")
    assert len(input_shape) == 3

    self.W = self.add_weight('{}_W'.format(self.name),
                             (input_shape[-1],),
                             initializer=self.init,
                             regularizer=self.W_regularizer,
                             constraint=self.W_constraint)
    if self.bias:
        self.b = self.add_weight('{}_b'.format(self.name),
                                (input_shape[1],),
                                 initializer='zero',
                                 regularizer=self.b_regularizer,
                                 constraint=self.b_constraint)
    else:
        self.b = None

    super(Attention, self).build(input_shape)

def compute_mask(self, input, input_mask=None):
    # do not pass the mask to the next layers
    return None

def call(self, x, mask=None):
    eij = dot_product(x, self.W)

    if self.bias:
        eij += self.b

    eij = K.tanh(eij)

    a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
    if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
        a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number e to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
    a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

    weighted_input = x * K.expand_dims(a)
    result = K.sum(weighted_input, axis=1)

    if self.return_attention:
         return [result, a]
    return result

def compute_output_shape(self, input_shape):
    if self.return_attention:
        return [(input_shape[0], input_shape[-1]),
                (input_shape[0], input_shape[1])]
    else:
        return input_shape[0], input_shape[-1]

def create_dummy_bilstm_model(): import tensorflow as tf from keras.optimizers import Adam from keras.activations import relu, sigmoid from keras.layers import Embedding, Dense, Input, Dropout, concatenate,, Bidirectional, LSTM from keras.models import Model

inputs = []
words_input = Input(shape=(200,))
inputs.append(words_input)
embedding = Embedding(5000, 200, input_length=200, trainable=config.retrain_embedding)
x_emb = embedding(words_input)
features_input = Input(shape=(200, 2))  # 2 for 2 types of speakers
x_emb = concatenate([x_emb, features_input], axis=-1)
inputs.append(features_input)
x_drop = Dropout(0.5)(x_emb)
hiddenBidirectional = Bidirectional(LSTM(64, return_sequences=True))(x_drop)
hidden_drop = Dropout(0.5)(hiddenBidirectional)
hidden = Attention()(hidden_drop)
predictions = Dense(config.model_out_size, activation=sigmoid)(hidden)
model = Model(inputs=[words_input, features_input], outputs=[predictions])
print(model.summary())
run_opts = tf.RunOptions()
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=config.learning_rate), metrics=['accuracy'], options=run_opts)
return model

def save_keras_model_as_onnx(keras_model): import onnxmltools onnx_model = onnxmltools.convert_keras(keras_model) onnxmltools.utils.save_model(onnx_model, 'yuval.onnx') print("model saved as onnx")

if name == 'main': model = create_dummy_bilstm_model() save_keras_model_as_onnx(model)

wenbingl commented 4 years ago

_" Drilling down into the error leads to a specific line in the Attention Class: weighted_input = x * K.expanddims(a) "

I don't see why it is a converter issue here?

yuvalshachaf commented 4 years ago

Keras wize has no issues whatsoever with this attention class. Back to onnx. When we bypassed this line of code the model converted successfuly. We think the error we are getting is something to do with the question mark in the second dim i.e axis 1. And as a result the expand dims fails. The shape of "a" in that line is (none,200) "x" shape is (none,none,128) Thanks