tensorflow / recommenders

TensorFlow Recommenders is a library for building recommender system models using TensorFlow.
Apache License 2.0
1.82k stars 273 forks source link

save model error #168

Closed SmileTM closed 3 years ago

SmileTM commented 3 years ago

error info :

TypeError: Invalid input_signature [{'user_id': [None], 'user_occupation_label': [TensorSpec(shape=(), dtype=tf.int32, name=None)]}]; input_signature must be a possibly nested sequence of TensorSpec objects.

the code in here, how to fix it?


from typing import Dict, Text
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
from pprint import pprint
import os

os.environ['CUDA_VISIBLE_DEVICES'] = "0"
gpus = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(device=gpus[0], enable=True)

# os.environ['CUDA_VISIBLE_DEVICES'] = "-1"
ratings = tfds.load('movielens/100k-ratings', split="train")
# Features of all the available movies.
movies = tfds.load('movielens/100k-movies', split="train")

for i in ratings.take(1):
    pprint(i)
for i in movies.take(1):
    pprint(i)

ratings = ratings.map(lambda x: {
    "movie_id": x["movie_id"],
    "user_id": x["user_id"],
    "user_occupation_label": x["user_occupation_label"]
})
movies = movies.map(lambda x: {
    "movie_id": x["movie_id"],
    "movie_title": x["movie_title"]
})

tf.random.set_seed(2021)
shuffled = ratings.shuffle(100000, seed=2021, reshuffle_each_iteration=False)
num_sample = len(shuffled)

train = shuffled.take(int(num_sample * 0.8))
test = shuffled.skip(int(num_sample * 0.8)).take(int(num_sample * 0.2))

movie_ids = movies.batch(1000).map(lambda x: x["movie_id"])
user_ids = ratings.batch(1000000).map(lambda x: x["user_id"])

unique_movie_ids = np.unique(np.concatenate(list(movie_ids)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

unique_movie_id_strings = [id.decode('utf-8') for id in unique_movie_ids]
unique_user_id_strings = [id.decode('utf-8') for id in unique_user_ids]

hidden_dimension = 128
embedding_dimension = 64

class UserModel(tf.keras.Model):
    def __init__(self, embedding_dimension, **kwargs):
        super(UserModel, self).__init__(**kwargs)
        user_features = [
            tf.feature_column.embedding_column(
                tf.feature_column.categorical_column_with_vocabulary_list('user_id', unique_user_id_strings),
                hidden_dimension),
            tf.feature_column.embedding_column(
                tf.feature_column.categorical_column_with_identity("user_occupation_label", 30),
                hidden_dimension
            )
        ]
        self.embedding_layer = tf.keras.layers.DenseFeatures(user_features, name="user_embedding")
        self.dense1 = tf.keras.layers.Dense(hidden_dimension)
        self.dense2 = tf.keras.layers.Dense(embedding_dimension)

    def call(self, inputs):
        x = self.embedding_layer(inputs)
        x= self.dense1(x)
        x= self.dense2(x)
        return x

class MovieModel(tf.keras.Model):
    def __init__(self, embedding_dimension, **kwargs):
        super(MovieModel, self).__init__(**kwargs)
        movie_features = [
            tf.feature_column.embedding_column(
                tf.feature_column.categorical_column_with_vocabulary_list('movie_id', unique_movie_id_strings),
                hidden_dimension)

        ]

        self.embedding_layer = tf.keras.layers.DenseFeatures(movie_features, name="movie_embedding")
        self.dense = tf.keras.layers.Dense(embedding_dimension)

    def call(self, inputs):
        x = self.embedding_layer(inputs)
        x =self.dense(x)

        return x

class MovielensModel(tfrs.Model):
    def __init__(self, **kwargs):
        super(MovielensModel, self).__init__(**kwargs)
        self.user_model = UserModel(embedding_dimension)
        self.movie_model = MovieModel(embedding_dimension)
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=movies.batch(128).map(lambda x: {"movie_id": x["movie_id"]}).map(self.movie_model))
        )

    def compute_loss(self, features, training=False) -> tf.Tensor:
        user_embeddings = self.user_model( {"user_id": features["user_id"], "user_occupation_label": features["user_occupation_label"]})
        # user_embeddings = self.user_model( {"user_id": features["user_id"]})
        movie_embeddings = self.movie_model({"movie_id": features["movie_id"]})
        return self.task(user_embeddings, movie_embeddings)

model = MovielensModel()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)
model.compile(optimizer=optimizer)
model.fit(train.batch(8192), epochs=1)

index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)

index.index(movies.batch(100).map(model.movie_model), movies.map(lambda x:x['movie_title']))
# index.index(movies.batch(100).map(model.movie_model), movies.map(lambda x:x['movie_id']))
_,titles = index({"user_id":["32"],"user_occupation_label":[20]})

print(f"Top 3 recommendations for user 32: {titles[0, :3]}")

index.save('./model')
anisayari commented 3 years ago

@SmileTM it is not really convenient to full passed an entire code and ask for a code review like that. Maybe can you give more details as for example in which line do you get this error please ? Thank you.

SmileTM commented 3 years ago

in last line of the code : index.save(path) the error info is 'input_signature' error.

your can pass the code to run.

maciejkula commented 3 years ago

Thanks, @anisayari - it would definitely help if you could put this in a colab we could run, @SmileTM .

The issue is that you need to call the index layer using tensors or numpy arrays, not just lists:

_, titles = index({"user_id": np.array(["32"]) ,"user_occupation_label": np.array([20])})

With this modification it should save just fine.

SmileTM commented 3 years ago

Thanks, @anisayari - it would definitely help if you could put this in a colab we could run, @SmileTM .

The issue is that you need to call the index layer using tensors or numpy arrays, not just lists:

_, titles = index({"user_id": np.array(["32"]) ,"user_occupation_label": np.array([20])})

With this modification it should save just fine.

thank you!!it really works well. I also want to paste the code in colab , but colab cannot be opened in China. QAQ. I'm sorry to add unnecessary trouble to you.