bug: 'XGBClassifier' object has no attribute '__call__' #4093

Open trongnghia05 opened 1 year ago

trongnghia05 commented 1 year ago

Describe the bug

Oh, I have a model trained using the XGBoost library following these steps:

Step 1: Save the model after training using joblib.dump(best_model, abspath(config.model.path)).

Step 2: After training, I used BentoML to save the model using bentoml.picklable_model.save_model.

Step 3: When I try to load the model to perform an API call using bentoml.picklable_model.get, I encounter the error 'XGBClassifier' object has no attribute 'call'.

I'm not sure why, even though in Step 2, after using save_model, I saw that a call method was present in the model signature, specifically displayed as follows: "Using the default model signature for pickable model ({'call': ModelSignature(batchable=False, batch_dim=(0, 0), input_spec=None, output_spec=None)}) for model 'xgboost'." This means that my model already has the call method, but when trying to infer, the error still says "no attribute 'call'".

train code:

import warnings


from functools import partial
from typing import Callable
from bentoml.types import ModelSignature
import hydra
import joblib
import numpy as np
import pandas as pd
from hydra.utils import to_absolute_path as abspath
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from omegaconf import DictConfig
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import bentoml

def load_data(path: DictConfig):
    X_train = pd.read_csv(abspath(path.X_train.path))
    X_test = pd.read_csv(abspath(path.X_test.path))
    y_train = pd.read_csv(abspath(path.y_train.path))
    y_test = pd.read_csv(abspath(path.y_test.path))
    return X_train, X_test, y_train, y_test

def get_objective(
    X_train: pd.DataFrame,
    y_train: pd.DataFrame,
    X_test: pd.DataFrame,
    y_test: pd.DataFrame,
    config: DictConfig,
    space: dict,

    model = XGBClassifier(

    evaluation = [(X_train, y_train), (X_test, y_test)]
    prediction = model.predict(X_test.values)
    accuracy = accuracy_score(y_test, prediction)
    print("SCORE:", accuracy)
    return {"loss": -accuracy, "status": STATUS_OK, "model": model}

def optimize(objective: Callable, space: dict):
    trials = Trials()
    best_hyperparams = fmin(
    print("The best hyperparameters are : ", "\n")
    best_model = trials.results[
        np.argmin([r["loss"] for r in trials.results])
    return best_model

@hydra.main(config_path="../../config", config_name="main")
def train(config: DictConfig):
    """Function to train the model"""

    X_train, X_test, y_train, y_test = load_data(config.processed)

    # Define space
    space = {
        "max_depth": hp.quniform("max_depth", **config.model.max_depth),
        "gamma": hp.uniform("gamma", **config.model.gamma),
        "reg_alpha": hp.quniform("reg_alpha", **config.model.reg_alpha),
        "reg_lambda": hp.uniform("reg_lambda", **config.model.reg_lambda),
        "colsample_bytree": hp.uniform(
            "colsample_bytree", **config.model.colsample_bytree
        "min_child_weight": hp.quniform(
            "min_child_weight", **config.model.min_child_weight
        "n_estimators": config.model.n_estimators,
        "seed": config.model.seed,
    objective = partial(
        get_objective, X_train, y_train, X_test, y_test, config

    # Find best model
    best_model = optimize(objective, space)
    bentoml.picklable_model.save_model(, best_model,
                                       signatures={"__call__": ModelSignature(batchable=False)})
    # Save model
    joblib.dump(best_model, abspath(config.model.path))

if __name__ == "__main__":

save model:

import bentoml
import hydra
import joblib
from hydra.utils import to_absolute_path as abspath
from omegaconf import DictConfig
from bentoml.types import ModelSignature

def load_model(model_path: str):
    return joblib.load(model_path)

@hydra.main(config_path="../../config", config_name="main")
def save_to_bentoml(config: DictConfig):
    model = load_model(abspath(config.model.path))
    bentoml.picklable_model.save_model(, model)

if __name__ == "__main__":


import bentoml
import numpy as np
import pandas as pd
from import JSON, NumpyNdarray
from hydra import compose, initialize
from patsy import dmatrix
from pydantic import BaseModel

with initialize( config_path="../../config"):
    config = compose(config_name="main")
    FEATURES = config.process.features

class Employee(BaseModel):
    City: str = "Pune"
    PaymentTier: int = 1
    Age: int = 25
    Gender: str = "Female"
    EverBenched: str = "No"
    ExperienceInCurrentDomain: int = 1

def add_dummy_data(df: pd.DataFrame):
    """Add dummy rows so that patsy can create features similar to the train dataset"""
    rows = {
        "City": ["Bangalore", "New Delhi", "Pune"],
        "Gender": ["Male", "Female", "Female"],
        "EverBenched": ["Yes", "Yes", "No"],
        "PaymentTier": [0, 0, 0],
        "Age": [0, 0, 0],
        "ExperienceInCurrentDomain": [0, 0, 0],
    dummy_df = pd.DataFrame(rows)
    return pd.concat([df, dummy_df])

def rename_columns(X: pd.DataFrame):
    X.columns = X.columns.str.replace("[", "_", regex=True).str.replace(
        "]", "", regex=True
    return X

def transform_data(df: pd.DataFrame):
    """Transform the data"""
    dummy_df = add_dummy_data(df)
    feature_str = " + ".join(FEATURES)
    dummy_X = dmatrix(f"{feature_str} - 1", dummy_df, return_type="dataframe")
    dummy_X = rename_columns(dummy_X)
    return dummy_X.iloc[0, :].values.reshape(1, -1)

model = bentoml.picklable_model.get(
# Create service with the model
service = bentoml.Service("predict_employee", runners=[model])

@service.api(input=JSON(pydantic_model=Employee), output=NumpyNdarray())
def predict(employee: Employee) -> np.ndarray:
    """Transform the data then make predictions"""
    df = pd.DataFrame(employee.dict(), index=[0])
    df = transform_data(df)
    result =[0]
    return np.array(result)

Expected behavior

The expected behavior when calling the /predict API is to receive the correct results, not an error like "'XGBClassifier' object has no attribute 'call'."


These are the library versions I used:

bentoml==1.1.0 dagshub==0.1.8 deepchecks==0.6.1 hydra-core==1.2.0 hyperopt==0.2.7 joblib==1.1.1 mlflow==1.25.1 numpy==1.22.4 pandas==1.4.2 pandera==0.13.4 patsy==0.5.2 pydantic==1.9.1 pytest_steps==1.8.0 requests==2.28.0 scikit_learn==1.2.1 streamlit==1.10.0 xgboost==1.7.6 dvc==2.8.1 fsspec==2022.7.1

trongnghia05 commented 1 year ago

I realize that when serving, I call, and it seems like this function does something to reload the model using cloudpickle, but after loading, it does not convert it into a runner, causing the saved model to not have the registered call method when using save_model. How can I handle this situation?

trongnghia05 commented 1 year ago

The following simple code snippet also does not work because when run, the model does not have the registered call method.

import bentoml

model = bentoml.picklable_model.get("xgboost:latest").to_runner()
# print(getattr(model, "__call__"))[[5.9, 3., 5.1, 1.8]])
frostming commented 1 year ago

So the result of this line:

# Find best model
best_model = optimize(objective, space)

Can best_model be called directly with best_model(...) ? If not, how is it supposed to be used in prediction?

trongnghia05 commented 1 year ago

Skipping the lengthy code above, I simply ran:

model = bentoml.picklable_model.get("xgboost:latest").to_runner()
#print(getattr(model, "call"))[[5.9, 3., 5.1, 1.8]])

and an error occurred."

Result when run with and comment print(getattr(model, "call")) :

Traceback (most recent call last):
  File "", line 7, in <module>[7])
  File "/home/nghiamt/PycharmProjects/MLOps/end-to-end-project/venv/lib/python3.8/site-packages/bentoml/_internal/runner/", line 52, in run
    return self.runner._runner_handle.run_method(self, *args, **kwargs)
  File "/home/nghiamt/PycharmProjects/MLOps/end-to-end-project/venv/lib/python3.8/site-packages/bentoml/_internal/runner/runner_handle/", line 48, in run_method
    return getattr(self._runnable,*args, **kwargs)
  File "/home/nghiamt/PycharmProjects/MLOps/end-to-end-project/venv/lib/python3.8/site-packages/bentoml/_internal/runner/", line 140, in method
    return self.func(obj, *args, **kwargs)
  File "/home/nghiamt/PycharmProjects/MLOps/end-to-end-project/venv/lib/python3.8/site-packages/bentoml/_internal/frameworks/", line 171, in _run
    return getattr(self.model, method_name)(
AttributeError: 'XGBClassifier' object has no attribute '__call__'

If I comment out the run method and run as follows, it still works, proving that the call method still exists. However, I don't understand why when I run the run method, it throws an error saying there is no call. Result when run with print(getattr(model, "call")) and comment run:

import bentoml

model= bentoml.picklable_model.get("xgboost:latest").to_runner()
print(getattr(model, "__call__"))
/home/nghiamt/PycharmProjects/MLOps/end-to-end-project/venv/lib/python3.8/site-packages/requests/ RequestsDependencyWarning: urllib3 (1.26.16) or chardet (5.2.0)/charset_normalizer (2.0.12) doesn't match a supported version!
'Runner.init_local' is for debugging and testing only. Make sure to remove it before deploying to production.
RunnerMethod(runner=Runner(name='xgboost', models=[Model(tag="xgboost:tzvcynrr2kpu5xjw", path="/home/nghiamt/bentoml/models/xgboost/tzvcynrr2kpu5xjw")], resource_config=None, runnable_class=<class 'bentoml._internal.frameworks.picklable.get_runnable.<locals>.PicklableRunnable'>, embedded=False, runner_methods=[...], scheduling_strategy=<class 'bentoml._internal.runner.strategy.DefaultStrategy'>, workers_per_resource=1, runnable_init_params={}, _runner_handle=<bentoml._internal.runner.runner_handle.local.LocalRunnerRef object at 0x7f9297c627f0>), name='__call__', config=RunnableMethodConfig(batchable=True, batch_dim=(0, 0), input_spec=None, output_spec=None), max_batch_size=100, max_latency_ms=60000)

This indicates that there is an issue with the run method.

frostming commented 1 year ago

The error says __call__ is missing on the model object, while what you are inspecting is the runner object, they are different objects and have different methods. Please inspect on the resulted best_model to see if __call__ is present. You can also get the underlying model by:

model= bentoml.picklable_model.load_model("xgboost:latest")
print(getattr(model, "__call__"))  # <-- is it there?

BentoML also has built-in support for XGBoost framework, try saving and loading model with bentoml.xgboost.* instead of bentoml.picklable_model. And the default entry fro XGBoost model inference is .predict().

trongnghia05 commented 1 year ago

I tried getattr(model, "__call__") to check __call__ method existed, If I ignore run method, my code worked: image

frostming commented 1 year ago

I tried getattr(model, "__call__") to check __call__ method existed

Wrong object being checked, the model in your screenshot is indeed a runner(it is returned by to_runner() method). Please check the code given in my last reply, it has a difference.

aarnphm commented 1 year ago

You can also just check if exists. But it seems like to me that the original model doesn't have a __call__ function.