tensorflow / tensorflow

An Open Source Machine Learning Framework for Everyone
https://tensorflow.org
Apache License 2.0
186.66k stars 74.35k forks source link

ValueError: No gradients provided for any variable (Keras 2.4, Tensorflow 2.3.0) #42038

Closed ScruffySilky closed 4 years ago

ScruffySilky commented 4 years ago

So I'm using this model to train on Google colab, it was written for Tensorflow 1.9 and Keras 2, but when I train I get the following error, has anyone seen this or how to solve it?

It was training fine before but this error started today.

Actual code:

from os import listdir
from numpy import array
from keras.preprocessing.text import Tokenizer, one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model, Sequential, model_from_json
from keras.utils import to_categorical
from keras.layers.core import Dense, Dropout, Flatten
from keras.optimizers import RMSprop
from keras.layers.convolutional import Conv2D
from keras.callbacks import ModelCheckpoint
from keras.layers import Embedding, TimeDistributed, RepeatVector, LSTM,     concatenate , Input, Reshape, Dense
from keras.preprocessing.image import array_to_img, img_to_array, load_img
import numpy as np

dir_name = '/data/train/'

# Read a file and return a string
def load_doc(filename):
file = open(filename, 'r')
text = file.read()
file.close()
return text

def load_data(data_dir):
text = []
images = []
# Load all the files and order them
all_filenames = listdir(data_dir)
all_filenames.sort()
for filename in (all_filenames):
    if filename[-3:] == "npz":
        # Load the images already prepared in arrays
        image = np.load(data_dir+filename)
        images.append(image['features'])
    else:
        # Load the boostrap tokens and rap them in a start and end tag
        syntax = '<START> ' + load_doc(data_dir+filename) + ' <END>'
        # Seperate all the words with a single space
        syntax = ' '.join(syntax.split())
        # Add a space after each comma
        syntax = syntax.replace(',', ' ,')
        text.append(syntax)
images = np.array(images, dtype=float)
return images, text

train_features, texts = load_data(dir_name)

# Initialize the function to create the vocabulary 
tokenizer = Tokenizer(filters='', split=" ", lower=False)
# Create the vocabulary 
tokenizer.fit_on_texts([load_doc('bootstrap.vocab')])

# Add one spot for the empty word in the vocabulary 
vocab_size = len(tokenizer.word_index) + 1
max_length = 48

def preprocess_data(texts, features, max_sequence):
X, y, image_data = list(), list(), list()
sequences = tokenizer.texts_to_sequences(texts)
for img_no, seq in enumerate(sequences):
    for i in range(1, len(seq)):
        # Add the sentence until the current count(i) and add the current     count to the output
        in_seq, out_seq = seq[:i], seq[i]
        # Pad all the input token sentences to max_sequence
        in_seq = pad_sequences([in_seq], maxlen=max_sequence)[0]
        # Turn the output into one-hot encoding
        out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
        # Add the corresponding image to the boostrap token file
        image_data.append(features[img_no])
        # Cap the input sentence to 48 tokens and add it
        X.append(in_seq[-48:])
        y.append(out_seq)
return np.array(image_data), np.array(X), np.array(y)

# data generator, intended to be used in a call to model.fit_generator()
def data_generator(descriptions, features, n_step, max_sequence):
# loop until we finish training
while 1:
    # loop over photo identifiers in the dataset
    for i in range(0, len(descriptions), n_step):
        Ximages, XSeq, y = list(), list(),list()
        for j in range(i, min(len(descriptions), i+n_step)):
            image = features[j]
            # retrieve text input
            desc = descriptions[j]
            # generate input-output pairs
            in_img, in_seq, out_word = preprocess_data([desc], [image], max_sequence)
            for k in range(len(in_img)):
                Ximages.append(in_img[k])
                XSeq.append(in_seq[k])
                y.append(out_word[k])
        # yield this batch of samples to the model
        yield [[array(Ximages), array(XSeq)], array(y)]

#Create the encoder
image_model = Sequential()
image_model.add(Conv2D(16, (3, 3), padding='valid', activation='relu', input_shape=(256, 256, 3,)))
image_model.add(Conv2D(16, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(32, (3,3), activation='relu', padding='same'))
image_model.add(Conv2D(32, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(64, (3,3), activation='relu', padding='same'))
image_model.add(Conv2D(64, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(128, (3,3), activation='relu', padding='same'))

image_model.add(Flatten())
image_model.add(Dense(1024, activation='relu'))
image_model.add(Dropout(0.3))
image_model.add(Dense(1024, activation='relu'))
image_model.add(Dropout(0.3))

image_model.add(RepeatVector(max_length))

visual_input = Input(shape=(256, 256, 3,))
encoded_image = image_model(visual_input)

language_input = Input(shape=(max_length,))
language_model = Embedding(vocab_size, 50, input_length=max_length,    mask_zero=True)(language_input)
language_model = LSTM(128, return_sequences=True)(language_model)
language_model = LSTM(128, return_sequences=True)(language_model)

#Create the decoder
decoder = concatenate([encoded_image, language_model])
decoder = LSTM(512, return_sequences=True)(decoder)
decoder = LSTM(512, return_sequences=False)(decoder)
decoder = Dense(vocab_size, activation='softmax')(decoder)

# Compile the model
model = Model(inputs=[visual_input, language_input], outputs=decoder)
optimizer = RMSprop(lr=0.0001, clipvalue=1.0)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

#Save the model for every 2nd epoch
filepath="org-weights-epoch-{epoch:04d}--loss-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, verbose=1, save_weights_only=True,     period=2)
callbacks_list = [checkpoint]

# test the data generator
generator = data_generator(texts, train_features, 1, 150)
model.fit_generator(generator, steps_per_epoch=50, epochs=5, callbacks=callbacks_list, verbose=1)

Error I receive when training:

ValueError                                
Traceback (most recent call last)
<ipython-input-4-6927891f43ca> in <module>()
  1 # test the data generator
  2 generator = data_generator(texts, train_features, 1, max_sequence)
----> 3 loaded_model.fit_generator(generator, steps_per_epoch=steps, epochs=5, callbacks=callbacks_list, verbose=1)
  4 loaded_model.save(mydrive + '/output/weights.hdf5')

12 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
971           except Exception as e:  # pylint:disable=broad-except
972             if hasattr(e, "ag_error_metadata"):
--> 973               raise e.ag_error_metadata.to_exception(e)
974             else:
975               raise

ValueError: in user code:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:806 train_function  *
    return step_function(self, iterator)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:796 step_function  **
    outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
    return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
    return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
    return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:789 run_step  **
    outputs = model.train_step(data)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:757 train_step
    self.trainable_variables)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:2737 _minimize
    trainable_variables))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:562 _aggregate_gradients
    filtered_grads_and_vars = _filter_grads(grads_and_vars)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:1271 _filter_grads
    ([v.name for _, v in grads_and_vars],))

ValueError: No gradients provided for any variable: ['embedding_1/embeddings:0', 'lstm_1/lstm_cell/kernel:0', 'lstm_1/lstm_cell/recurrent_kernel:0', 'lstm_1/lstm_cell/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'conv2d_2/kernel:0', 'conv2d_2/bias:0', 'conv2d_3/kernel:0', 'conv2d_3/bias:0', 'conv2d_4/kernel:0', 'conv2d_4/bias:0', 'conv2d_5/kernel:0', 'conv2d_5/bias:0', 'conv2d_6/kernel:0', 'conv2d_6/bias:0', 'conv2d_7/kernel:0', 'conv2d_7/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0', 'dense_2/kernel:0', 'dense_2/bias:0', 'lstm_2/lstm_cell_1/kernel:0', 'lstm_2/lstm_cell_1/recurrent_kernel:0', 'lstm_2/lstm_cell_1/bias:0', 'lstm_3/lstm_cell_2/kernel:0', 'lstm_3/lstm_cell_2/recurrent_kernel:0', 'lstm_3/lstm_cell_2/bias:0', 'lstm_4/lstm_cell_3/kernel:0', 'lstm_4/lstm_cell_3/recurrent_kernel:0', 'lstm_4/lstm_cell_3/bias:0', 'dense_3/kernel:0', 'dense_3/bias:0'].

I'm training it on Google Colab using the TPU. If I train it on Tensorflow 1.x it trains fine but takes 8 hours per epoch with my dataset. Tensorflow 2.x was taking 1 hour per epoch but is now giving this error

EDIT: SOLUTION

I cannot train on TPU, but I am at least able to train on GPU, I can continue the project ! Solution by @silentkinght25 and @silentkinght25 solves it.

"To resolve this u must modify the data generator function as: yield ([array(Ximages), array(XSeq)], array(y)) instead of yield [[array(Ximages), array(XSeq)], array(y)]"

amahendrakar commented 4 years ago

@ScruffySilky, On running the code, I am facing an error stating FileNotFoundError: [Errno 2] No such file or directory: '/data/train/'.

Could you please provide all the necessary files required to run the script. Thanks!

ScruffySilky commented 4 years ago

Sure, here are the locations of the two files needed to run it:

  1. '/data/train/' = https://github.com/ekohendratno/Screenshot-to-code-in-Keras/tree/master/local/Bootstrap/resources/eval_light
  2. 'bootstrap.vocab' = https://github.com/ekohendratno/Screenshot-to-code-in-Keras/blob/master/local/Bootstrap/resources/bootstrap.vocab
ScruffySilky commented 4 years ago

I'm 100% sure it has to do with my data_generator because running the belolw example works fine It just cant fit the data into memory https://github.com/ekohendratno/Screenshot-to-code-in-Keras/blob/master/floydhub/Bootstrap/bootstrap.ipynb

amahendrakar commented 4 years ago

@ScruffySilky, I was able to run the code without any issues on TF v2.3. Please find the gist of it here. Thanks!

ScruffySilky commented 4 years ago

@ScruffySilky, I was able to run the code without any issues on TF v2.3. Please find the gist of it here. Thanks!

The error occurs on the model.fit.generator() function:

generator = data_generator(texts, train_features, 1, 150)
model.fit_generator(generator, steps_per_epoch=50, epochs=5, callbacks=callbacks_list, verbose=1)
Kaelinator commented 4 years ago

Having the same issue. Using model.fit with just numpy arrays works (with saved data using numpy.save), but I was trying to use the new saving/loading dataset features, and I'm getting this error message in my model.fit_generator(...).

EDIT: Okay well it was because my generator wasn't yielding a tuple with (x, y), instead just a single dictionary with all my named keys. Now I'm yielding { 'input1': ..., 'input2': ...}, label according to the documentation

ScruffySilky commented 4 years ago

Having the same issue. Using model.fit with just numpy arrays works (with saved data using numpy.save), but I was trying to use the new saving/loading dataset features, and I'm getting this error message in my model.fit_generator(...).

EDIT: Okay well it was because my generator wasn't yielding a tuple with (x, y), instead just a single dictionary with all my named keys. Now I'm yielding { 'input1': ..., 'input2': ...}, label according to the documentation

Any suggestion for my code?

Kaelinator commented 4 years ago

Try changing the yield part in data_generator from:

# yield this batch of samples to the model
        yield [[array(Ximages), array(XSeq)], array(y)]

to just:

# yield this batch of samples to the model
        yield [array(Ximages), array(XSeq)], array(y)

Since the documentation expects a tuple (x, y) to be returned from your generator.

ScruffySilky commented 4 years ago

Try changing the yield part in data_generator from:

# yield this batch of samples to the model
        yield [[array(Ximages), array(XSeq)], array(y)]

to just:

# yield this batch of samples to the model
        yield [array(Ximages), array(XSeq)], array(y)

Since the documentation expects a tuple (x, y) to be returned from your generator.

Changing that gives this error:

`UnavailableError                          Traceback (most recent call last)
<ipython-input-5-38870834c9d8> in <module>()
      1 # test the data generator
      2 generator = data_generator(texts, train_features, 1, max_sequence)
----> 3 model.fit_generator(generator, steps_per_epoch=steps, epochs=5, callbacks=callbacks_list, verbose=1)
      4 model.save(mydrive + '/output/weights.hdf5')

16 frames
/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)

UnavailableError: {{function_node __inference_train_function_28401}} failed to connect to all addresses
Additional GRPC error information from remote target /job:localhost/replica:0/task:0/device:CPU:0:
:{"created":"@1596748432.082355881","description":"Failed to pick subchannel","file":"third_party/grpc/src/core/ext/filters/client_channel/client_channel.cc","file_line":3948,"referenced_errors":[{"created":"@1596748432.082354141","description":"failed to connect to all addresses","file":"third_party/grpc/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc","file_line":394,"grpc_status":14}]}
     [[{{node IteratorGetNext}}]]`
amahendrakar commented 4 years ago

The error occurs on the model.fit.generator() function:

generator = data_generator(texts, train_features, 1, 150) model.fit_generator(generator, steps_per_epoch=50, epochs=5, callbacks=callbacks_list, verbose=1)

@ScruffySilky, Could you please share the code you are running, so that we can look into it. Thanks!

ScruffySilky commented 4 years ago

The error occurs on the model.fit.generator() function: generator = data_generator(texts, train_features, 1, 150) model.fit_generator(generator, steps_per_epoch=50, epochs=5, callbacks=callbacks_list, verbose=1)

@ScruffySilky, Could you please share the code you are running, so that we can look into it. Thanks!

import os
from os import listdir
from numpy import array
from keras.preprocessing.text import Tokenizer, one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model, load_model, Sequential, model_from_json
from keras.utils import to_categorical
from keras.layers.core import Dense, Dropout, Flatten
from keras.optimizers import RMSprop
from keras.layers.convolutional import Conv2D
from keras.callbacks import ModelCheckpoint
from keras.layers import Embedding, TimeDistributed, RepeatVector, LSTM, concatenate , Input, Reshape, Dense
import numpy as np
import tensorflow as tf

resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)

from google.colab import drive
drive.mount('/content/gdrive')

# !ls "/content/gdrive/My Drive/weights"
mydrive = "/content/gdrive/My Drive"
dir_name = mydrive+"/data/" 
# ( https://github.com/ekohendratno/Screenshot-to-code-in-Keras/tree/master/local/Bootstrap/resources/eval_light)
bootstrap_vocab = mydrive+"/weights/bootstrap.vocab" 
# (https://github.com/ekohendratno/Screenshot-to-code-in-Keras/blob/master/local/Bootstrap/resources/bootstrap.vocab)

# Read a file and return a string
def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

def load_data(data_dir):
    text = []
    images = []
    # Load all the files and order them
    all_filenames = listdir(data_dir)
    all_filenames.sort()
    for filename in (all_filenames):
        if filename[-3:] == "npz":
            # Load the images already prepared in arrays
            image = np.load(data_dir+filename)
            images.append(image['features'])
        else:
            # Load the boostrap tokens and rap them in a start and end tag
            syntax = '<START> ' + load_doc(data_dir+filename) + ' <END>'
            # Seperate all the words with a single space
            syntax = ' '.join(syntax.split())
            # Add a space after each comma
            syntax = syntax.replace(',', ' ,')
            text.append(syntax)
    images = np.array(images, dtype=float)
    return images, text

train_features, texts = load_data(dir_name)

# Initialize the function to create the vocabulary 
tokenizer = Tokenizer(filters='', split=" ", lower=False)
# Create the vocabulary 
tokenizer.fit_on_texts([load_doc(bootstrap_vocab)])

# Add one spot for the empty word in the vocabulary 
vocab_size = len(tokenizer.word_index) + 1
max_length = 48
max_sequence = (max(len(d.split()) for d in texts))

def preprocess_data(texts, features, max_sequence):
    X, y, image_data = list(), list(), list()
    sequences = tokenizer.texts_to_sequences(texts)
    for img_no, seq in enumerate(sequences):
        for i in range(1, len(seq)):
            # Add the sentence until the current count(i) and add the current count to the output
            in_seq, out_seq = seq[:i], seq[i]
            # Pad all the input token sentences to max_sequence
            in_seq = pad_sequences([in_seq], maxlen=max_sequence)[0]
            # Turn the output into one-hot encoding
            out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
            # Add the corresponding image to the boostrap token file
            image_data.append(features[img_no])
            # Cap the input sentence to 48 tokens and add it
            X.append(in_seq[-48:])
            y.append(out_seq)
    return np.array(image_data), np.array(X), np.array(y)

# data generator, intended to be used in a call to model.fit_generator()
def data_generator(descriptions, features, n_step, max_sequence):
    # loop until we finish training
    while 1:
        # loop over photo identifiers in the dataset
        for i in range(0, len(descriptions), n_step):
            Ximages, XSeq, y = list(), list(),list()
            for j in range(i, min(len(descriptions), i+n_step)):
                image = features[j]
                # retrieve text input
                desc = descriptions[j]
                # generate input-output pairs
                in_img, in_seq, out_word = preprocess_data([desc], [image], max_sequence)
                for k in range(len(in_img)):
                    Ximages.append(in_img[k])
                    XSeq.append(in_seq[k])
                    y.append(out_word[k])
            # yield this batch of samples to the model
            yield [[array(Ximages), array(XSeq)], array(y)]

#Create the encoder
image_model = Sequential()
image_model.add(Conv2D(16, (3, 3), padding='valid', activation='relu', input_shape=(256, 256, 3,)))
image_model.add(Conv2D(16, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(32, (3,3), activation='relu', padding='same'))
image_model.add(Conv2D(32, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(64, (3,3), activation='relu', padding='same'))
image_model.add(Conv2D(64, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(128, (3,3), activation='relu', padding='same'))

image_model.add(Flatten())
image_model.add(Dense(1024, activation='relu'))
image_model.add(Dropout(0.3))
image_model.add(Dense(1024, activation='relu'))
image_model.add(Dropout(0.3))

image_model.add(RepeatVector(max_length))

visual_input = Input(shape=(256, 256, 3,))
encoded_image = image_model(visual_input)

language_input = Input(shape=(max_length,))
language_model = Embedding(vocab_size, 50, input_length=max_length, mask_zero=True)(language_input)
language_model = LSTM(128, return_sequences=True)(language_model)
language_model = LSTM(128, return_sequences=True)(language_model)

#Create the decoder
decoder = concatenate([encoded_image, language_model])
decoder = LSTM(512, return_sequences=True)(decoder)
decoder = LSTM(512, return_sequences=False)(decoder)
decoder = Dense(vocab_size, activation='softmax')(decoder)

# Compile the model
model = Model(inputs=[visual_input, language_input], outputs=decoder)
optimizer = RMSprop(lr=0.0001, clipvalue=1.0)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

#Save the model for every 2nd epoch
filepath="weights.hdf5"
checkpoint = ModelCheckpoint(filepath, verbose=1, period=1)
callbacks_list = [checkpoint]

steps = len(texts)
print('steps: ', steps)
print('max_sequence: ', max_sequence)

model.fit_generator(data_generator(texts, train_features, 1, max_sequence), steps_per_epoch=steps, epochs=1, callbacks=callbacks_list, verbose=1)
model.save(mydrive + '/output/weights.hdf5')
amahendrakar commented 4 years ago

Was able to reproduce the issue with TF v2.3. Please find the gist of it here. Thanks!

ScruffySilky commented 4 years ago

So you guys upgrade your back-end and I as a paying customer must just forget my project because it doesn't work on your new platform. Is there any resolution to this...

ScruffySilky commented 4 years ago

Is there any planned solution for this bug ?

silentkinght25 commented 4 years ago

I had same problem @ScruffySilky but i resolved it. The problem is in data generator function, because model.fit(generator,....) expect a list of tuple or a dict of tuple. To resolve this u must modify the data generator function as: yield ([array(Ximages), array(XSeq)], array(y)) instead of yield [[array(Ximages), array(XSeq)], array(y)]

silentkinght25 commented 4 years ago

I had same problem @ScruffySilky but i resolved it. The problem is in data generator function, because model.fit(generator,....) expect a list of tuple or a dict of tuple. To resolve this u must modify the data generator function as: yield ([array(Ximages), array(XSeq)], array(y)) instead of yield [[array(Ximages), array(XSeq)], array(y)]

I hope this resolves ur issue.

ScruffySilky commented 4 years ago

Ok I cannot train on TPU, but I am able to train on GPU. Solution by @silentkinght25 and @silentkinght25 seems to solve it.

"To resolve this u must modify the data generator function as: yield ([array(Ximages), array(XSeq)], array(y)) instead of yield [[array(Ximages), array(XSeq)], array(y)]"

google-ml-butler[bot] commented 4 years ago

Are you satisfied with the resolution of your issue? Yes No

kartiksonaghela commented 4 years ago

I had same problem @ScruffySilky but i resolved it. The problem is in data generator function, because model.fit(generator,....) expect a list of tuple or a dict of tuple. To resolve this u must modify the data generator function as: yield ([array(Ximages), array(XSeq)], array(y)) instead of yield [[array(Ximages), array(XSeq)], array(y)]

I hope this resolves ur issue.

Im still getting the error here is my code

from keras.preprocessing.text import Tokenizer
import numpy as np

def to_lines(descriptions):
    all_desc = list()
    for key in descriptions.keys():
        [all_desc.append(d) for d in descriptions[key]]
     return all_desc
def create_tokenizer(descriptions):
    lines = to_lines(descriptions)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

tokenizer = create_tokenizer(train_descriptions)

vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

def max_length(descriptions):
    lines = to_lines(descriptions)
    return max(len(d.split()) for d in lines)

def create_sequences(tokenizer, max_length, desc_list, photo):
    X1, X2, y = list(), list(), list()
    for desc in desc_list:
        seq = tokenizer.texts_to_sequences([desc])[0]
        for i in range(1, len(seq)):
            in_seq, out_seq = seq[:i], seq[i]
            in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
            out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
            X1.append(photo)
            X2.append(in_seq)
            y.append(out_seq)
    return np.array(X1), np.array(X2), np.array(y)
def data_generator(descriptions, photos, tokenizer, max_length):
    while 1:
      for key, desc_list in descriptions.items():
        # retrieve the photo feature
        photo = photos[key][0]
        in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo)
        yield ([array(in_img), array(in_seq)], array(out_word))

model = define_model(vocab_size, max_length)
epochs = 20
steps = len(train_descriptions)
for i in range(epochs):
    generator = data_generator(train_descriptions, train_features, tokenizer, max_length)
    model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
    model.save('model_' + str(i) + '.h5')

`

and the error is
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-267-102bfe3f0b2a> in <module>()
      8         generator = data_generator(train_descriptions, train_features, tokenizer, max_length)
      9        ---> 10         model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
     11         model.save('model_' + str(i) + '.h5')

10 frames /usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name) 58 ctx.ensure_initialized() 59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name, ---> 60 inputs, attrs, num_outputs) 61 except core._NotOkStatusException as e: 62 if name is not None:

InvalidArgumentError: 2 root error(s) found. (0) Invalid argument: Matrix size-incompatible: In[0]: [47,1000], In[1]: [4096,256] [[node functional_67/dense_97/MatMul (defined at :10) ]] [[gradient_tape/functional_67/embedding_32/embedding_lookup/Reshape_1/_30]] (1) Invalid argument: Matrix size-incompatible: In[0]: [47,1000], In[1]: [4096,256] [[node functional_67/dense_97/MatMul (defined at :10) ]] 0 successful operations. 0 derived errors ignored. [Op:__inference_train_function_610379]

Function call stack: train_function -> train_function

Please help this is my code plz help https://colab.research.google.com/drive/145K_SfItvudtSnTNZKreoKOs8GEYjlsF?usp=sharing

amahendrakar commented 3 years ago

@kartiksonaghela, Could you please submit a new issue from this link and fill in the template, so that we can track the issue there. Thanks!