Predict() is not working when using BatchNormalization()

KentoW commented 7 years ago

I am trying to use a following little complicated LSTM language model for text generation.

# -*- coding: utf-8 -*-
import json
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import keras
from keras.models import Model
from keras.layers import Input, Dense, Activation, Embedding, LSTM, TimeDistributed, concatenate
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.regularizers import l2

class MyLSTM:
    def __init__(self):
        self.FEATURE_SIZE = 1000
        self.VOCA_SIZE = 20000
        self.MAX_LEN = 20

    def build(self):
        print('Build model...')
        # INPUT LAYER 1
        word_input = Input(shape=(self.MAX_LEN,), dtype='int32', name='word_input')
        word_emb = Embedding(output_dim=512, input_length=self.MAX_LEN, input_dim=self.VOCA_SIZE, name='word_emb')(word_input)
        # INPUT LAYER 2
        feature_inputs = Input(shape=(self.MAX_LEN, self.FEATURE_SIZE), dtype='float32', name='f_inputs')
        feature_denses = TimeDistributed(Dense(512, activation='relu', kernel_regularizer=l2(0.01), name='f_dense'), name='f_denses')(feature_inputs)
        # CONCAT LAYER
        merge_layer = concatenate([word_emb, feature_denses], name='concat_layer')
        # LSTM
        word_lstm = LSTM(1024, implementation=1, kernel_regularizer=l2(0.01), name='word_lstm')(word_emb)
        word_dense = Dense(self.VOCA_SIZE, kernel_regularizer=l2(0.01), name='word_dense')(word_lstm)
        word_norm = BatchNormalization(name="word_norm")(word_dense)
        word_output = Activation('softmax', name='word_output')(word_norm)
        self.model = Model(inputs=[word_input, feature_inputs], outputs=[word_output])
        # COMPILE
        optimizer = Adam(lr=0.001)
        self.model.compile(optimizer=optimizer, loss={'word_output': 'categorical_crossentropy'})

    def save(self):
        self.model.save_weights('model.hdf5')
        with open('model.json', 'w') as f:
            f.write(self.model.to_json())

def main():
    mylstm = MyLSTM()
    mylstm.build()
    mylstm.save()

if __name__ == "__main__":
    main()

Using Keras.js and encoder.py, I could load this model. However, I also get the same error as comment #69, when calling Model.predict() in Keras.js . And I can use Model.predict() when I do not use BatchNormalization()

What is the solution to this error? Or can you please fix BatchNormalization() and Predict() ?

I'm using Keras 2.0.4 and python 3.5.3

ghost commented 7 years ago

I can confirm this. I am getting the same error with a different neural network structure that also uses BatchNormalization.

mirukuma commented 7 years ago

I have this problem, too. I want to know whether this is a problem of keras-js.

transcranial / keras-js

Predict() is not working when using BatchNormalization() #75