InvalidArgumentError: indices[1038,1] = 509 is not in [0, 500) [[{{node loss_6/dense_14_loss/embedding_lookup}}]]

(The title of this issue should’ve been: InvalidArgumentError: indices[1207,1] = 128 is not in [0, 128) [[{{node loss_7/dense_16_loss/embedding_lookup}}]]. I made a silly mistake there.)

System information

Have I written custom code (as opposed to using example directory): Y
TensorFlow backend (yes / no): Y
TensorFlow version: 1.15.0-dev20190711
Keras version: 2.2.4

I was training an item-based movie recommender with a simple single-layer autoencoder, and had this error:

File "C:\Users\Administrator\Anaconda3\envs\venv\lib\site-packages\tensorflow_core\python\client\session.py", line 1455, in call run_metadata_ptr)

InvalidArgumentError: indices[1207,1] = 128 is not in [0, 128) [[{{node loss_7/dense_16_loss/embedding_lookup}}]]

I was using movielens dataset which can be found here:

http://files.grouplens.org/datasets/movielens/ml-latest-small.zip

I believe the error occurred at the last fit command. The train_ratings is a numpy array with 610 columns as there are 610 users(raters) in total, and they give ratings to 9724 movies so the array has 9724 rows.

The model goes below as follows:

import tensorflow as tf
from keras import backend as K
from keras.models import Model
from keras.layers import Dense, Input
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

##############import data##############
def load_ratings(fname, random_state=42):
    temp = pd.read_csv(fname, names=['userId', 'movieId', 'rating', 'timestamp'], dtype=np.unicode)
    tempp = temp.drop([0])
    ratings = tempp.reset_index(drop=True)
    indices = range(len(ratings))
    train_val_indices, test_indices = train_test_split(indices, test_size=0.1, random_state=random_state)

    movie_idxs = {}
    user_idxs = {}
    def get_user_idx(user_id):
        if not user_id in user_idxs:
            user_idxs[user_id] = len(user_idxs)
        return user_idxs[user_id]

    def get_movie_idx(movie_id):
        if not movie_id in movie_idxs:
            movie_idxs[movie_id] = len(movie_idxs)
        return movie_idxs[movie_id]    

    num_users = ratings.userId.nunique()
    num_movies = ratings.movieId.nunique()
    data = {
        'ratings': np.zeros((num_users, num_movies), dtype=np.float16),
        'train': {
            'mask': np.zeros((num_users, num_movies), dtype=np.float16),
            'users': set(),
            'movies': set(),
        },
        'test': {
            'mask': np.zeros((num_users, num_movies), dtype=np.float16),
            'users': set(),
            'movies': set(),
        },
    }

    for indices, k in [(train_val_indices, 'train'), (test_indices, 'test')]:
        for row in ratings.iloc[indices].itertuples():
            user_idx = get_user_idx(row.userId)
            movie_idx = get_movie_idx(row.movieId)
            data['ratings'][user_idx, movie_idx] = row.rating
            data[k]['mask'][user_idx, movie_idx] = 1
            data[k]['users'].add(user_idx)
            data[k]['movies'].add(movie_idx)

    return data
reordered_data = load_ratings("ratings.csv")
num_users = reordered_data['ratings'].shape[0]
num_items = reordered_data['ratings'].shape[1]
train_ratings = np.transpose(np.multiply(reordered_data['train']['mask'], reordered_data['ratings']))
test_ratings = np.transpose(np.multiply(reordered_data['test']['mask'], reordered_data['ratings']))

##############initialization##############
#####Regularization function
def froreg(weight_matrix):
    return (1/2)*(tf.norm(weight_matrix))**2
#####loss function
def partial(y_true, y_pred):
    zero = tf.constant(0, dtype=tf.float32)
    position = K.tf.not_equal(y_true, zero)
    indices = K.tf.where(position)
    y_subpred = K.gather(y_pred, indices)
    return K.sum(K.square(y_subpred - y_true), axis=-1)
#####metric
def RMSE(y_true, y_pred):
    return K.sqrt((K.sum(K.square(y_pred - y_true), axis=-1))/num_users)

##############model##############
observed = Input(shape=(num_users,))
#####encoded layer
encoded = Dense(500, activation='sigmoid', use_bias=True, kernel_regularizer=froreg)(observed)
#####decoded layer
decoded = Dense(num_users, use_bias=True, kernel_regularizer=froreg)(encoded)
#####model
autoencoder = Model(observed, decoded)
autoencoder.summary()

##############training process##############
#####compile
autoencoder.compile(optimizer='SGD', loss=partial, metrics=['accuracy', RMSE])
#####fit
autoencoder.fit(train_ratings, train_ratings, validation_split=0.1, batch_size=128, epochs=5, shuffle=True)

A full error trace:

Traceback (most recent call last):

File "", line 1, in runfile('C:/Users/Administrator/.spyder-py3/temp.py', wdir='C:/Users/Administrator/.spyder-py3')

File "C:\Users\Administrator\Anaconda3\envs\venv\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile execfile(filename, namespace)

File "C:\Users\Administrator\Anaconda3\envs\venv\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile exec(compile(f.read(), filename, 'exec'), namespace)

File "C:/Users/Administrator/.spyder-py3/temp.py", line 101, in autoencoder.fit(train_ratings, train_ratings, validation_split=0.1, batch_size=128, epochs=5, shuffle=True)

File "C:\Users\Administrator\Anaconda3\envs\venv\lib\site-packages\keras\engine\training.py", line 1039, in fit validation_steps=validation_steps)

File "C:\Users\Administrator\Anaconda3\envs\venv\lib\site-packages\keras\engine\training_arrays.py", line 199, in fit_loop outs = f(ins_batch)

File "C:\Users\Administrator\Anaconda3\envs\venv\lib\site-packages\keras\backend\tensorflow_backend.py", line 2715, in call return self._call(inputs)

File "C:\Users\Administrator\Anaconda3\envs\venv\lib\site-packages\keras\backend\tensorflow_backend.py", line 2675, in _call fetched = self._callable_fn(*array_vals)

File "C:\Users\Administrator\Anaconda3\envs\venv\lib\site-packages\tensorflow_core\python\client\session.py", line 1455, in call run_metadata_ptr)

InvalidArgumentError: indices[1207,1] = 128 is not in [0, 128) [[{{node loss_7/dense_16_loss/embedding_lookup}}]]

keras-team / keras

InvalidArgumentError: indices[1038,1] = 509 is not in [0, 500) [[{{node loss_6/dense_14_loss/embedding_lookup}}]] #13097