Attaching post-processing to ensemble models

There is an issue with PMML generation when the pipeline includes a post-processing step.

I am trying to convert a linear regression model with segmentation to pmml using Alias(ExpressionTransformer("numpy.exp(X[0])"), 'y', prefit=True) to make predict_transformer argument in PMMLPipeline.

And I get the following error:

Standard output is empty
Standard error:
авг 12, 2022 5:41:45 PM sklearn2pmml.pipeline.PMMLPipeline initTargetFields
WARNING: Attribute 'sklearn2pmml.pipeline.PMMLPipeline.target_fields' is not set. Assuming y as the name of the target field
Exception in thread "main" org.jpmml.model.UnsupportedAttributeException: Attribute with value Segmentation@multipleModelMethod=selectFirst is not supported
    at org.jpmml.converter.mining.MiningModelUtil.getFinalModel(MiningModelUtil.java:245)
    at org.jpmml.converter.mining.MiningModelUtil.getFinalModel(MiningModelUtil.java:231)
    at sklearn2pmml.pipeline.PMMLPipeline.encodePMML(PMMLPipeline.java:260)
    at com.sklearn2pmml.Main.run(Main.java:84)
    at com.sklearn2pmml.Main.main(Main.java:62)

I am using the version 0.85.0.

The strange thing is without predict_transformer argument it works fine.

A reproducible example:

import numpy as np
import pandas as pd

from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn_pandas import DataFrameMapper
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn2pmml.pipeline import PMMLPipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn2pmml.decoration import ContinuousDomain
from sklearn2pmml.ensemble import SelectFirstRegressor
from sklearn2pmml import sklearn2pmml
from sklearn2pmml import PMMLPipeline
from sklearn2pmml.preprocessing import ExpressionTransformer
from sklearn2pmml.decoration import Alias

def create_single_pipeline(features, impute_values, functions, logreg_coefs, intercept, n_step):
    n_feats = len(features)
    trans_pipes = [Pipeline([
        ('expression', Alias(ExpressionTransformer(expr=functions[i]),\
                                                            'expr_' + str(n_step) + '_' + str(i))),\
    ('imputer', SimpleImputer(missing_values=np.nan, strategy='constant', \
        fill_value=impute_values[i]))]) for i in range(n_feats)]

    column_trans = ColumnTransformer([(features[i] + '_trans', trans_pipes[i], [features[i]]) for i in range(n_feats)])

    lr = LinearRegression()
    n = 1000
    data_samp = pd.DataFrame(np.random.uniform(size=(n, n_feats)), columns=features)

    pipe = Pipeline([('transformer', column_trans), ('logreg', lr)])
    pipe.fit(X=data_samp, y=np.random.random(n) * 1000)
    lr.intercept_ = intercept
    lr.coef_ = np.array(logreg_coefs)
    return pipe

def create_segmented_pmml(all_features, conditions, step_features, impute_values, functions, \
                          logreg_coefs, intercepts, save_path, df):
    all_mapper = DataFrameMapper([([feat], [ContinuousDomain()]) for feat in all_features], input_df=True, df_out=True )
    steps = []
    n_steps = len(conditions)
    name = [str(i) for i in range(n_steps)]
    for i in range(n_steps):
        pipe = create_single_pipeline(step_features[i], impute_values[i], functions[i], logreg_coefs[i], intercepts[i], i)
        steps.append((name[i], pipe, conditions[i]))
    all_mapper.fit(df)    
    pt = Alias(ExpressionTransformer("numpy.exp(X[0])"), 'y', prefit=True)
    pipe_conditional = PMMLPipeline([('mapper', all_mapper), ('conditional_pipeline',\
             SelectFirstRegressor(steps))]
                                      , predict_transformer=pt
                                   )

    sklearn2pmml(pipe_conditional, save_path)
    return pipe_conditional

all_features = ['segment', 'x1', 'x2']
conditions = ['X[0] == 0.', 'X[0] == 1.']
step_features = [['x1', 'x2'],['x1', 'x2']]
impute_values = [[0.0, 0.0],[0.0, 0.0]]
funcs_1 = [
            '0.0 if X[0] is None else X[0]',
            '0.0 if X[0] is None else X[0]'
    ]
funcs_2 = [
            '0.0 if X[0] is None else X[0]',
            '0.0 if X[0] is None else X[0]'
    ]
functions = [funcs_1, funcs_2]
linreg_coefs = [[-0.122134, -0.115721], [0.100822, 0.330425]]
intercepts = [4.7, -2.0]

np.random.seed(89)
df = pd.DataFrame(np.random.uniform(size=(1000, 3)), columns=['segment','x1','x2'])
df['segment'] = np.random.binomial(n=1, p=0.5, size=[1000])

pipe = create_segmented_pmml(all_features, conditions, step_features, impute_values, functions, linreg_coefs, intercepts, 'res.pmml', df)

I will appreciate your help!

jpmml / sklearn2pmml

Attaching post-processing to ensemble models #342