I get an IndexError error on rankings = ranking_metrics_at_k(model, ratings_train, ratings_test)
when the model is trained on cpu, but not on gpu?
{'data variant': '1m', 'model_name': 'als', 'use gpu': False, 'embedding dimension': 16, 'regularization': 1e-06, 'iterations': 128, 'use BM 25': True} because of the following error: IndexError('index 4054 is out of bounds for axis 1 with size 3953').
[my code is a mod of movielens.py basically to split the data set and then use ranking
implicit/cpu/matrix_factorization_base.py", line 79, in recommend
ids, scores = topk(
^^^^^
File "topk.pyx", line 41, in implicit.cpu.topk.topk
File "topk.pyx", line 54, in implicit.cpu.topk._topk_batch
def benchmark_movies(
min_rating=4.0,
variant="20m",
model_kwargs={}):
model_name = model_kwargs.pop("model_name")
use_bm25 = model_kwargs.pop("use_BM25", False)
model_proc = {
"als": AlternatingLeastSquares,
"bpr": BayesianPersonalizedRanking,
"lmf": LogisticMatrixFactorization,
"tfidf": TFIDFRecommender,
"cosine": CosineRecommender,
"bm25": BM25Recommender, #B=0.2, **model_kwargs)
}
if model_name in model_proc.keys():
# generate a recommender model based off the input params
model = model_proc[model_name](
**model_kwargs,
)
else:
raise NotImplementedError(f"model {model_name} isn't implemented for this example")
log.info("has cuda %s" % HAS_CUDA)
# read in the input data file
start = time.time()
titles, ratings = get_movielens(variant)
# remove things < min_rating, and convert to implicit dataset
# by considering ratings as a binary preference only
ratings.data[ratings.data < min_rating] = 0
ratings.eliminate_zeros()
ratings.data = np.ones(len(ratings.data))
log.info("read data file in %s", time.time() - start)
if model_name == "als":
ratings_train, ratings_test = leave_k_out_split(ratings, K=5, train_only_size=0.0)
ratings = ratings_train # to not break rest of code
# lets weight these models by bm25weight.
if use_bm25:
log.debug("weighting matrix by bm25_weight")
ratings = (bm25_weight(ratings, B=0.9) * 5)
ratings = ratings.tocsr()
user_ratings = ratings.T.tocsr()
# train the model
log.debug("training model %s", model_name)
start = time.time()
model.fit(user_ratings)
log.debug("trained model '%s' in %s", model_name, time.time() - start)
rankings = ranking_metrics_at_k(model, ratings_train, ratings_test)
log.info("ranking metrics = %s " % rankings)
wandb.log(data=rankings)
# "precision", "map", "ndcg","auc"]
return rankings["precision"]
I get an IndexError error on
rankings = ranking_metrics_at_k(model, ratings_train, ratings_test)
when the model is trained on cpu, but not on gpu?
{'data variant': '1m', 'model_name': 'als', 'use gpu': False, 'embedding dimension': 16, 'regularization': 1e-06, 'iterations': 128, 'use BM 25': True} because of the following error: IndexError('index 4054 is out of bounds for axis 1 with size 3953').
[my code is a mod of movielens.py basically to split the data set and then use ranking