microsoft / hummingbird

Hummingbird compiles trained ML models into tensor computation for faster inference.
MIT License
3.34k stars 278 forks source link

MissingConverter: Unable to find converter <class 'sklearn.preprocessing._encoders.OrdinalEncoder'> #671

Open dintellect opened 1 year ago

dintellect commented 1 year ago
---------------------------------------------------------------------------
MissingConverter                          Traceback (most recent call last)
/var/folders/f2/9tbmpg411hndwc482xn850br0000gn/T/ipykernel_27005/3005074338.py in <module>
----> 1 hb_model = convert(clf, 'torch',X_train[0:1])

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/convert.py in convert(model, backend, test_input, device, extra_config)
    442     """
    443     assert constants.REMAINDER_SIZE not in extra_config
--> 444     return _convert_common(model, backend, test_input, device, extra_config)
    445 
    446 

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/convert.py in _convert_common(model, backend, test_input, device, extra_config)
    403         return _convert_sparkml(model, backend_formatted, test_input, device, extra_config)
    404 
--> 405     return _convert_sklearn(model, backend_formatted, test_input, device, extra_config)
    406 
    407 

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/convert.py in _convert_sklearn(model, backend, test_input, device, extra_config)
    106     # We modify the scikit learn model during translation.
    107     model = deepcopy(model)
--> 108     topology = parse_sklearn_api_model(model, extra_config)
    109 
    110     # Convert the Topology object into a PyTorch model.

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/_parse.py in parse_sklearn_api_model(model, extra_config)
     63     # Parse the input scikit-learn model into a topology object.
     64     # Get the outputs of the model.
---> 65     outputs = _parse_sklearn_api(topology, model, inputs)
     66 
     67     # Declare output variables.

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/_parse.py in _parse_sklearn_api(topology, model, inputs)
    228     tmodel = type(model)
    229     if tmodel in sklearn_api_parsers_map:
--> 230         outputs = sklearn_api_parsers_map[tmodel](topology, model, inputs)
    231     else:
    232         outputs = _parse_sklearn_single_model(topology, model, inputs)

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/_parse.py in _parse_sklearn_pipeline(topology, model, inputs)
    274     """
    275     for step in model.steps:
--> 276         inputs = _parse_sklearn_api(topology, step[1], inputs)
    277     return inputs
    278 

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/_parse.py in _parse_sklearn_api(topology, model, inputs)
    228     tmodel = type(model)
    229     if tmodel in sklearn_api_parsers_map:
--> 230         outputs = sklearn_api_parsers_map[tmodel](topology, model, inputs)
    231     else:
    232         outputs = _parse_sklearn_single_model(topology, model, inputs)

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/_parse.py in _parse_sklearn_column_transformer(topology, model, inputs)
    451                 )
    452         else:
--> 453             var_out = _parse_sklearn_api(topology, model_obj, transform_inputs)[0]
    454             if model.transformer_weights is not None and name in model.transformer_weights:
    455                 # Create a Multiply node

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/_parse.py in _parse_sklearn_api(topology, model, inputs)
    230         outputs = sklearn_api_parsers_map[tmodel](topology, model, inputs)
    231     else:
--> 232         outputs = _parse_sklearn_single_model(topology, model, inputs)
    233 
    234     return outputs

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/_parse.py in _parse_sklearn_single_model(topology, model, inputs)
    250         raise RuntimeError("Parameter model must be an object not a " "string '{0}'.".format(model))
    251 
--> 252     alias = get_sklearn_api_operator_name(type(model))
    253     this_operator = topology.declare_logical_operator(alias, model)
    254     this_operator.inputs = inputs

~/opt/anaconda3/lib/python3.9/site-packages/hummingbird/ml/supported.py in get_sklearn_api_operator_name(model_type)
    463     """
    464     if model_type not in sklearn_api_operator_name_map:
--> 465         raise MissingConverter("Unable to find converter for model type {}.".format(model_type))
    466     return sklearn_api_operator_name_map[model_type]
    467 

MissingConverter: Unable to find a converter for model type <class 'sklearn.preprocessing._encoders.OrdinalEncoder'>.
It usually means the pipeline being converted contains a
transformer or a predictor with no corresponding converter implemented.
Please fill an issue at https://github.com/microsoft/hummingbird.

Which Scikit-learn pipeline operators do hummingbird support?

ksaur commented 1 year ago

You can read about our supported operators in our wiki.

I don't think this one is on the list, see in particular "Preprocessing". Can you please post your code and we can add it to the feature requests?

dintellect commented 1 year ago

Below is the code:

# ML Libraries
from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import xgboost as xgb

#Ordinal Encoding
categorical_transformer = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
preprocessor = ColumnTransformer(transformers=[("cat", categorical_transformer, categorical_features),])

#Scikit Learn Pipeline
clf = Pipeline(steps=[("preprocessor", preprocessor),("classifier", model)])

#Model Training
clf.fit(X_train, y_train)

#Conversion
from hummingbird.ml import convert
hb_model = convert(clf, 'torch',X_train[0:1])
ksaur commented 1 year ago

Thanks we'll take a look! In the meantime, see maybe OneHotEncoder (which we support) could work (but might not depending on your dataset).