Open mpenkov opened 4 years ago
Keras-related tests across multiple PRs have again started failing due to unrelated-to-the-PR upstream version changes. Here's an example of 2 tests currently failing on Python 3.7 & Python 3.8:
=================================== FAILURES ===================================
____________ TestKerasWord2VecWrapper.testEmbeddingLayer20NewsGroup ____________
self = <gensim.test.test_keras_integration.TestKerasWord2VecWrapper testMethod=testEmbeddingLayer20NewsGroup>
def testEmbeddingLayer20NewsGroup(self):
"""
Test Keras 'Embedding' layer returned by 'get_embedding_layer' function
for a smaller version of the 20NewsGroup classification problem.
"""
MAX_SEQUENCE_LENGTH = 1000
# Prepare text samples and their labels
# Processing text dataset
texts = [] # list of text samples
texts_w2v = [] # used to train the word embeddings
labels = [] # list of label ids
data = fetch_20newsgroups(subset='train', categories=['alt.atheism', 'comp.graphics', 'sci.space'])
for index in range(len(data)):
label_id = data.target[index]
file_data = data.data[index]
i = file_data.find('\n\n') # skip header
if i > 0:
file_data = file_data[i:]
try:
curr_str = str(file_data)
sentence_list = curr_str.split('\n')
for sentence in sentence_list:
sentence = (sentence.strip()).lower()
texts.append(sentence)
texts_w2v.append(sentence.split(' '))
labels.append(label_id)
except Exception:
pass
# Vectorize the text samples into a 2D integer tensor
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
# word_index = tokenizer.word_index
data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
labels = to_categorical(np.asarray(labels))
x_train = data
y_train = labels
# prepare the embedding layer using the wrapper
keras_w2v = self.model_twenty_ng
keras_w2v.build_vocab(texts_w2v)
keras_w2v.train(texts, total_examples=keras_w2v.corpus_count, epochs=keras_w2v.epochs)
keras_w2v_wv = keras_w2v.wv
embedding_layer = keras_w2v_wv.get_keras_embedding()
# create a 1D convnet to solve our classification task
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
> embedded_sequences = embedding_layer(sequence_input)
MAX_SEQUENCE_LENGTH = 1000
curr_str = '\n\nIn article <bill.047m@xpresso.UUCP> bill@xpresso.UUCP (Bill Vance) writes:\n>It has been known for quite a while ... Henry Spencer @ U of Toronto Zoology\nbetween SVR3 and SunOS. - Dick Dunn | henry@zoo.toronto.edu utzoo!henry\n'
data = array([[ 0, 0, 0, ..., 0, 0, 0],
[ 0, 0, 0, ..., 0, 0, 0],
[ 0, 0, 0, ..., ..., 5, 89, 360],
[ 0, 0, 0, ..., 366, 367, 42],
[ 0, 0, 0, ..., 0, 0, 0]], dtype=int32)
embedding_layer = <keras.layers.embeddings.Embedding object at 0x7fab732f1438>
file_data = '\n\nIn article <bill.047m@xpresso.UUCP> bill@xpresso.UUCP (Bill Vance) writes:\n>It has been known for quite a while ... Henry Spencer @ U of Toronto Zoology\nbetween SVR3 and SunOS. - Dick Dunn | henry@zoo.toronto.edu utzoo!henry\n'
i = 129
index = 4
keras_w2v = <gensim.models.word2vec.Word2Vec object at 0x7fab7329d978>
keras_w2v_wv = <gensim.models.keyedvectors.KeyedVectors object at 0x7fab7329d710>
label_id = 2
labels = array([[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0... [0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.]], dtype=float32)
self = <gensim.test.test_keras_integration.TestKerasWord2VecWrapper testMethod=testEmbeddingLayer20NewsGroup>
sentence = ''
sentence_list = ['', '', 'In article <bill.047m@xpresso.UUCP> bill@xpresso.UUCP (Bill Vance) writes:', '>It has been known for quite a... Does anyone make a "globe" that is accurate', '>as to actual shape, landmass configuration/Long/Lat lines etc.?', ...]
sequence_input = <tf.Tensor 'input_1:0' shape=(None, 1000) dtype=int32>
sequences = [[], [], [], [10, 90, 11, 12, 43, 91, ...], [13, 95, 96, 44, 97, 98, ...], [100, 5, 101, 102, 103, 4, ...], ...]
texts = ['', '', '', 'i doubt there are good prospects for a self armoring system', 'for venus surface conditions (several hundred degrees, very high', 'pressure of co2, possibly sulfuric and nitric acids or oxides', ...]
texts_w2v = [[''], [''], [''], ['i', 'doubt', 'there', 'are', 'good', 'prospects', ...], ['for', 'venus', 'surface', 'conditions', '(several', 'hundred', ...], ['pressure', 'of', 'co2,', 'possibly', 'sulfuric', 'and', ...], ...]
tokenizer = <keras_preprocessing.text.Tokenizer object at 0x7fabc4b36748>
x_train = array([[ 0, 0, 0, ..., 0, 0, 0],
[ 0, 0, 0, ..., 0, 0, 0],
[ 0, 0, 0, ..., ..., 5, 89, 360],
[ 0, 0, 0, ..., 366, 367, 42],
[ 0, 0, 0, ..., 0, 0, 0]], dtype=int32)
y_train = array([[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0... [0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.]], dtype=float32)
gensim/test/test_keras_integration.py:128:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.tox/py37-linux/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:75: in symbolic_fn_wrapper
return func(*args, **kwargs)
args = (<keras.layers.embeddings.Embedding object at 0x7fab732f1438>, <tf.Tensor 'input_1:0' shape=(None, 1000) dtype=int32>)
func = <function Layer.__call__ at 0x7fab7dbb5f28>
kwargs = {}
.tox/py37-linux/lib/python3.7/site-packages/keras/engine/base_layer.py:446: in __call__
self.assert_input_compatibility(inputs)
inputs = <tf.Tensor 'input_1:0' shape=(None, 1000) dtype=int32>
kwargs = {}
self = <keras.layers.embeddings.Embedding object at 0x7fab732f1438>
.tox/py37-linux/lib/python3.7/site-packages/keras/engine/base_layer.py:310: in assert_input_compatibility
K.is_keras_tensor(x)
inputs = [<tf.Tensor 'input_1:0' shape=(None, 1000) dtype=int32>]
self = <keras.layers.embeddings.Embedding object at 0x7fab732f1438>
x = <tf.Tensor 'input_1:0' shape=(None, 1000) dtype=int32>
.tox/py37-linux/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:695: in is_keras_tensor
if not is_tensor(x):
x = <tf.Tensor 'input_1:0' shape=(None, 1000) dtype=int32>
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = <tf.Tensor 'input_1:0' shape=(None, 1000) dtype=int32>
def is_tensor(x):
> return isinstance(x, tf_ops._TensorLike) or tf_ops.is_dense_tensor_like(x)
E AttributeError: module 'tensorflow.python.framework.ops' has no attribute '_TensorLike'
x = <tf.Tensor 'input_1:0' shape=(None, 1000) dtype=int32>
.tox/py37-linux/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:703: AttributeError
_____________ TestKerasWord2VecWrapper.testEmbeddingLayerCosineSim _____________
self = <gensim.test.test_keras_integration.TestKerasWord2VecWrapper testMethod=testEmbeddingLayerCosineSim>
def testEmbeddingLayerCosineSim(self):
"""
Test Keras 'Embedding' layer returned by 'get_embedding_layer' function for a simple word similarity task.
"""
keras_w2v_model = self.model_cos_sim
keras_w2v_model_wv = keras_w2v_model.wv
embedding_layer = keras_w2v_model_wv.get_keras_embedding()
input_a = Input(shape=(1,), dtype='int32', name='input_a')
input_b = Input(shape=(1,), dtype='int32', name='input_b')
> embedding_a = embedding_layer(input_a)
embedding_layer = <keras.layers.embeddings.Embedding object at 0x7fab732c3710>
input_a = <tf.Tensor 'input_a:0' shape=(None, 1) dtype=int32>
input_b = <tf.Tensor 'input_b:0' shape=(None, 1) dtype=int32>
keras_w2v_model = <gensim.models.word2vec.Word2Vec object at 0x7fab732c3f98>
keras_w2v_model_wv = <gensim.models.keyedvectors.KeyedVectors object at 0x7fab732c3978>
self = <gensim.test.test_keras_integration.TestKerasWord2VecWrapper testMethod=testEmbeddingLayerCosineSim>
gensim/test/test_keras_integration.py:58:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.tox/py37-linux/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:75: in symbolic_fn_wrapper
return func(*args, **kwargs)
args = (<keras.layers.embeddings.Embedding object at 0x7fab732c3710>, <tf.Tensor 'input_a:0' shape=(None, 1) dtype=int32>)
func = <function Layer.__call__ at 0x7fab7dbb5f28>
kwargs = {}
.tox/py37-linux/lib/python3.7/site-packages/keras/engine/base_layer.py:446: in __call__
self.assert_input_compatibility(inputs)
inputs = <tf.Tensor 'input_a:0' shape=(None, 1) dtype=int32>
kwargs = {}
self = <keras.layers.embeddings.Embedding object at 0x7fab732c3710>
.tox/py37-linux/lib/python3.7/site-packages/keras/engine/base_layer.py:310: in assert_input_compatibility
K.is_keras_tensor(x)
inputs = [<tf.Tensor 'input_a:0' shape=(None, 1) dtype=int32>]
self = <keras.layers.embeddings.Embedding object at 0x7fab732c3710>
x = <tf.Tensor 'input_a:0' shape=(None, 1) dtype=int32>
.tox/py37-linux/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:695: in is_keras_tensor
if not is_tensor(x):
x = <tf.Tensor 'input_a:0' shape=(None, 1) dtype=int32>
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = <tf.Tensor 'input_a:0' shape=(None, 1) dtype=int32>
def is_tensor(x):
> return isinstance(x, tf_ops._TensorLike) or tf_ops.is_dense_tensor_like(x)
E AttributeError: module 'tensorflow.python.framework.ops' has no attribute '_TensorLike'
x = <tf.Tensor 'input_a:0' shape=(None, 1) dtype=int32>
.tox/py37-linux/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:703: AttributeError
There's essentially just one line of substantive keras code in gensim:
Moving that line & the code which tests/demos it to a Wiki page, or other demo/tutorial notebook outside of the main unit testing cycle, could save multiple other non-Keras-using gensim-contributors hours of frustration as they have to review/work-around errors unrelated to their PRs.
Keras tests should be already disabled, see #2908 .
That's helpful! (My PRs based on code from 2 days ago don't yet add that disablement, but I'll bring it in.)
But, the case for removing this 1-line-of-library-code, & accompanying 100+ lines of overbroad & now repeatedly failure-prone testing code, remains strong. Moving it to a notebook demo would make #2908 ("re-enabling" the source of a recurring problem) moot, and end the low-value game of whack-a-mole against their API evolution.
Originally posted by @gojomo in https://github.com/RaRe-Technologies/gensim/pull/2869#issuecomment-651354811