ranking metrics fail when model trained on CPU but not GPU?; implicit 0.7.2

I get an IndexError error on rankings = ranking_metrics_at_k(model, ratings_train, ratings_test)

when the model is trained on cpu, but not on gpu?

{'data variant': '1m', 'model_name': 'als', 'use gpu': False, 'embedding dimension': 16, 'regularization': 1e-06, 'iterations': 128, 'use BM 25': True} because of the following error: IndexError('index 4054 is out of bounds for axis 1 with size 3953').

[my code is a mod of movielens.py basically to split the data set and then use ranking

implicit/cpu/matrix_factorization_base.py", line 79, in recommend
    ids, scores = topk(
                  ^^^^^
  File "topk.pyx", line 41, in implicit.cpu.topk.topk
  File "topk.pyx", line 54, in implicit.cpu.topk._topk_batch

def benchmark_movies(
        min_rating=4.0, 
        variant="20m",
        model_kwargs={}):

    model_name = model_kwargs.pop("model_name")
    use_bm25 = model_kwargs.pop("use_BM25", False)
    model_proc = {
        "als": AlternatingLeastSquares,
        "bpr": BayesianPersonalizedRanking,
        "lmf": LogisticMatrixFactorization,
        "tfidf": TFIDFRecommender,
        "cosine": CosineRecommender,
        "bm25": BM25Recommender, #B=0.2, **model_kwargs)
    }
    if model_name in model_proc.keys():
        # generate a recommender model based off the input params
        model = model_proc[model_name](
            **model_kwargs,
        )
    else:
        raise NotImplementedError(f"model {model_name} isn't implemented for this example")

    log.info("has cuda %s" % HAS_CUDA)

    # read in the input data file
    start = time.time()
    titles, ratings = get_movielens(variant)

    # remove things < min_rating, and convert to implicit dataset
    # by considering ratings as a binary preference only
    ratings.data[ratings.data < min_rating] = 0
    ratings.eliminate_zeros()
    ratings.data = np.ones(len(ratings.data))
    log.info("read data file in %s", time.time() - start)

    if model_name == "als":
        ratings_train, ratings_test = leave_k_out_split(ratings, K=5, train_only_size=0.0)    
        ratings = ratings_train # to not break rest of code
        # lets weight these models by bm25weight.
        if use_bm25:
            log.debug("weighting matrix by bm25_weight")
            ratings = (bm25_weight(ratings, B=0.9) * 5)
        ratings = ratings.tocsr()

    user_ratings = ratings.T.tocsr()

    # train the model
    log.debug("training model %s", model_name)
    start = time.time()
    model.fit(user_ratings)
    log.debug("trained model '%s' in %s", model_name, time.time() - start)
    rankings = ranking_metrics_at_k(model, ratings_train, ratings_test)
    log.info("ranking metrics = %s " % rankings)
    wandb.log(data=rankings)
    # "precision", "map", "ndcg","auc"]
    return rankings["precision"]

benfred / implicit

ranking metrics fail when model trained on CPU but not GPU?; implicit 0.7.2 #704