model support LMSKerasCallback() on LSTM

PowerAI : 1.5.3 Tensorflow : 1.10

Hi, I tried to applying LMS to simple LSTM(keras). I followed same as LMS doc.

The original keras code : https://github.com/keras-team/keras/blob/master/examples/imdb_bidirectional_lstm.py

Here is code that adding LMS :

'''
#Trains a Bidirectional LSTM on the IMDB sentiment classification task.

Output after 4 epochs on CPU: ~0.8146
Time per epoch on CPU (Core i7): ~150s.
'''

from __future__ import print_function
import numpy as np
import tensorflow as tf
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb

from keras.utils.training_utils import multi_gpu_model

tf.logging.set_verbosity(tf.logging.INFO)

max_features = 20000
# cut texts after this number of words
# (among top max_features most common words)
maxlen = 100
batch_size = 1024 #32

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
y_train = np.array(y_train)
y_test = np.array(y_test)

from tensorflow.core.protobuf import rewriter_config_pb2
from keras.backend import tensorflow_backend as K
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement=True
config.log_device_placement=False
config.graph_options.rewrite_options.dependency_optimization = rewriter_config_pb2.RewriterConfig.OFF
config.graph_options.rewrite_options.memory_optimization = rewriter_config_pb2.RewriterConfig.SCHEDULING_HEURISTICS
K.set_session(tf.Session(config=config))

with tf.device('/cpu:0'):
  model = Sequential()
  model.add(Embedding(max_features, 128, input_length=maxlen))
  model.add(Bidirectional(LSTM(64)))
  model.add(Dropout(0.5))
  model.add(Dense(1, activation='sigmoid'))

from tensorflow.contrib.lms import LMSKerasCallback
# LMSKerasCallback and LMS share a set of keyword arguments. Here we just
# use the default options.
lms_callback = LMSKerasCallback()

# try using different optimizers and different optimizer configs
model = multi_gpu_model(model, gpus=2)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print('Train...')
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=4,
          callbacks=[lms_callback],
          validation_data=[x_test, y_test])

and I have same error as #9

Train on 25000 samples, validate on 25000 samples
INFO:tensorflow:[LMS][0] Editing model for LMS
INFO:tensorflow:[LMS][0] n_tensors: all tensors
INFO:tensorflow:[LMS][0] lb: 1
Traceback (most recent call last):
  File "lstmKeras_lms.py", line 73, in <module>
    validation_data=[x_test, y_test])
  File "/root/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1705, in fit
    validation_steps=validation_steps)
  File "/root/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1153, in _fit_loop
    callbacks.set_model(callback_model)
  File "/root/anaconda3/lib/python3.6/site-packages/keras/callbacks.py", line 52, in set_model
    callback.set_model(model)
  File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/lms/python/keras_callback.py", line 55, in set_model
    lmsMod.run()
  File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/lms/python/lms.py", line 330, in run
    self._topo_sort.build()
  File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/lms/python/topos.py", line 45, in build
    topo_sort = list(toposort.toposort(self._build_dependency_dict()))
  File "/root/anaconda3/lib/python3.6/site-packages/toposort.py", line 81, in toposort
    raise CircularDependencyError(data)
  File "/root/anaconda3/lib/python3.6/site-packages/toposort.py", line 45, in __init__
    s = 'Circular dependencies exist among these items: {{{}}}'.format(', '.join('{!r}:{!r}'.format(key, value) for key, value in sorted(data.items())))
TypeError: '<' not supported between instances of 'Operation' and 'Operation'

I also read #9, but I don't know how I solve this problem. Is it(that code) wrong? Or does LSTM not apply to LMS?

Thanks.

IBM / tensorflow-large-model-support

model support LMSKerasCallback() on LSTM #17