Closed jperuggia closed 3 years ago
@jperuggia Thanks for your findings. Do you mind to send your model to me? Then I can debug it where is wrong. For the general regression model, the NaN predictions are usually caused by the input missing/invalid values, please double check if the input values are in the valid range defined in the PMML, for the numeric field, the data field could define valid intervals, for the categorical field, the data field could have a list of valid categories.
@scorebot Thanks for your quick response. I have attached a copy of the sample PMML file in question. I've verified that my sample call to the execution is only using valid value fields for all categorical fields. Again the input I am sending it works and scores as expected when using the jpmml-evaluator but not pmml4s.
Sample Request
"inputs":{
"POL_PFM_LVL_CD_Grp":"NoHit",
"VEH_CARFAX_Recent_Annual_Mileage_Curve":51230,
"POL_PFM_LVL_CD_Grp2":"E-H",
"BI_POL_LMT_CSL_IND":"N",
"LOC_RISK_ST_ALPHA_CD_Grp":"Tier-4",
"POL_NEW_VEH_AGE_999":"999",
"INC_MON_CLEAN_POL_Grp":"Clean",
"POL_PRIR_ST_MIN_BI_LMT_IND":"Y",
"INC_NAF_IND_3YR_Grp":100,
"POL_PFM_LVL_CD_Curve":10,
"INC_NAF_IND_5YR_Grp":2.3,
"POL_YOUGEST_SEX_DERIVED":"F",
"DRV_PRNCPL_INS_AGE_Curve":34,
"LOC_RISK_ST_NF":"V",
"INC_NCAF_IND_5YR_Grp":10,
"INC_CAF_IND_5YR_Grp":15,
"POL_VEH_CNT_Curve":5,
"POL_YOUTH_CNT_Grp":2,
"INC_COMP_IND_5YR":4,
"POL_NEW_VEH_AGE_Curve":4,
"INC_COMP_IND_3YR":2,
"VEH_CARFAX_Recent_Annual_Mileage_BLANK":"BLANK",
"POL_COV_COMPOSITION_Grp":"Other",
"POL_YNG_LST_DRV_AGE_YR_Curve1":16,
"POL_HOMEOWNERSHIP_Grp":"NO HOME",
"POL_OVRALL_MULTILN_IND":"N",
"BI_ST_MIN_IND":"N",
"INC_MON_CLEAN_POL_Curve":123,
"POL_TRM_RNEW_DSCR":"NEW",
"LocationScore_MetScore":123,
"POL_MRY_CNT_Grp":"0",
"POL_SRC_OF_BUS_CD_Grp":"IA/PCS",
"POL_TENURE_Curve":4,
"BI_LIMIT_GROUP_Grp":"High",
"LOC_RTZIP_NE_BLZIP_IND":"N",
"POL_DR_VEH_CNT_2_Grp":"V1_D<=V"
}
PMML File Contents
<?xml version="1.0" encoding="UTF-8"?>
<PMML version="4.3" xmlns="http://www.dmg.org/PMML-4_3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dmg.org/PMML-4_3 http://www.dmg.org/pmml/v4-3/pmml-4-3.xsd">
<Header copyright="Copyright (c) 2017" description="Logistic regression Model for Not-At-Fault Claims POC">
<Extension name="user" value="jperuggia" extender="Rattle/PMML"/>
<Application name="Rattle/PMML" version="1.4"/>
<Timestamp>2020-11-16 16:26:31</Timestamp>
</Header>
<DataDictionary numberOfFields="37">
<DataField name="INC_NAF_CURRTRM_RES" optype="continuous" dataType="double"/>
<DataField name="BI_LIMIT_GROUP_Grp" optype="categorical" dataType="string">
<Value value="High"/>
<Value value="Low"/>
<Value value="Medium"/>
<Value value="Very High"/>
</DataField>
<DataField name="BI_ST_MIN_IND" optype="categorical" dataType="string">
<Value value="N"/>
<Value value="Y"/>
</DataField>
<DataField name="BI_POL_LMT_CSL_IND" optype="categorical" dataType="string">
<Value value="N"/>
<Value value="Y"/>
</DataField>
<DataField name="DRV_PRNCPL_INS_AGE_Curve" optype="continuous" dataType="double"/>
<DataField name="LocationScore_MetScore" optype="continuous" dataType="double"/>
<DataField name="INC_CAF_IND_5YR_Grp" optype="continuous" dataType="double"/>
<DataField name="INC_COMP_IND_3YR" optype="continuous" dataType="double"/>
<DataField name="INC_COMP_IND_5YR" optype="continuous" dataType="double"/>
<DataField name="INC_MON_CLEAN_POL_Grp" optype="categorical" dataType="string">
<Value value="Clean"/>
<Value value="NQ"/>
</DataField>
<DataField name="INC_MON_CLEAN_POL_Curve" optype="continuous" dataType="double"/>
<DataField name="INC_NAF_IND_3YR_Grp" optype="continuous" dataType="double"/>
<DataField name="INC_NAF_IND_5YR_Grp" optype="continuous" dataType="double"/>
<DataField name="INC_NCAF_IND_5YR_Grp" optype="continuous" dataType="double"/>
<DataField name="LOC_RISK_ST_ALPHA_CD_Grp" optype="categorical" dataType="string">
<Value value="Tier-1"/>
<Value value="Tier-2"/>
<Value value="Tier-3"/>
<Value value="Tier-4"/>
<Value value="Tier-5"/>
<Value value="Tier-6"/>
</DataField>
<DataField name="LOC_RISK_ST_NF" optype="categorical" dataType="string">
<Value value="A"/>
<Value value="M"/>
<Value value="N/A"/>
<Value value="V"/>
</DataField>
<DataField name="LOC_RTZIP_NE_BLZIP_IND" optype="categorical" dataType="string">
<Value value="N"/>
<Value value="Y"/>
</DataField>
<DataField name="POL_COV_COMPOSITION_Grp" optype="categorical" dataType="string">
<Value value="ALL FULL"/>
<Value value="Other"/>
<Value value="SOME FULL"/>
</DataField>
<DataField name="POL_DR_VEH_CNT_2_Grp" optype="categorical" dataType="string">
<Value value="V1_D>=V"/>
<Value value="V2+_D<V"/>
<Value value="V2+_D>=V"/>
</DataField>
<DataField name="POL_HOMEOWNERSHIP_Grp" optype="categorical" dataType="string">
<Value value="HOME"/>
<Value value="NO HOME"/>
</DataField>
<DataField name="POL_MRY_CNT_Grp" optype="categorical" dataType="string">
<Value value="0"/>
<Value value="1"/>
<Value value="2"/>
<Value value="other"/>
</DataField>
<DataField name="POL_NEW_VEH_AGE_999" optype="categorical" dataType="string">
<Value value="999"/>
<Value value="other"/>
</DataField>
<DataField name="POL_NEW_VEH_AGE_Curve" optype="continuous" dataType="double"/>
<DataField name="POL_OVRALL_MULTILN_IND" optype="categorical" dataType="string">
<Value value="N"/>
<Value value="Y"/>
</DataField>
<DataField name="POL_PFM_LVL_CD_Grp" optype="categorical" dataType="string">
<Value value="NoHit"/>
<Value value="PFM1"/>
<Value value="PFM2&3"/>
<Value value="PFM4"/>
<Value value="Score"/>
<Value value="Unknown"/>
</DataField>
<DataField name="POL_PFM_LVL_CD_Curve" optype="continuous" dataType="double"/>
<DataField name="POL_PFM_LVL_CD_Grp2" optype="categorical" dataType="string">
<Value value="E-H"/>
<Value value="Other"/>
</DataField>
<DataField name="POL_PRIR_ST_MIN_BI_LMT_IND" optype="categorical" dataType="string">
<Value value="N"/>
<Value value="Y"/>
</DataField>
<DataField name="POL_SRC_OF_BUS_CD_Grp" optype="categorical" dataType="string">
<Value value="DRM/IB/ITC/TSU"/>
<Value value="GPC/USD"/>
<Value value="IA/PCS"/>
</DataField>
<DataField name="POL_TENURE_Curve" optype="continuous" dataType="double"/>
<DataField name="POL_TRM_RNEW_DSCR" optype="categorical" dataType="string">
<Value value="NEW"/>
<Value value="RENEW"/>
</DataField>
<DataField name="POL_YNG_LST_DRV_AGE_YR_Curve1" optype="continuous" dataType="double"/>
<DataField name="POL_VEH_CNT_Curve" optype="continuous" dataType="double"/>
<DataField name="POL_YOUGEST_SEX_DERIVED" optype="categorical" dataType="string">
<Value value="F"/>
<Value value="M"/>
</DataField>
<DataField name="POL_YOUTH_CNT_Grp" optype="categorical" dataType="string">
<Value value="0"/>
<Value value="1"/>
<Value value="2"/>
</DataField>
<DataField name="VEH_CARFAX_Recent_Annual_Mileage_BLANK" optype="categorical" dataType="string">
<Value value="BLANK"/>
<Value value="Non-BLANK"/>
</DataField>
<DataField name="VEH_CARFAX_Recent_Annual_Mileage_Curve" optype="continuous" dataType="double"/>
</DataDictionary>
<GeneralRegressionModel modelName="MetScore Model: beta version" modelType="generalizedLinear" functionName="regression" algorithmName="glm" distribution="binomial" linkFunction="logit">
<MiningSchema>
<MiningField name="INC_NAF_CURRTRM_RES" usageType="predicted"/>
<MiningField name="BI_LIMIT_GROUP_Grp" usageType="active"/>
<MiningField name="BI_ST_MIN_IND" usageType="active"/>
<MiningField name="BI_POL_LMT_CSL_IND" usageType="active"/>
<MiningField name="DRV_PRNCPL_INS_AGE_Curve" usageType="active"/>
<MiningField name="LocationScore_MetScore" usageType="active"/>
<MiningField name="INC_CAF_IND_5YR_Grp" usageType="active"/>
<MiningField name="INC_COMP_IND_3YR" usageType="active"/>
<MiningField name="INC_COMP_IND_5YR" usageType="active"/>
<MiningField name="INC_MON_CLEAN_POL_Grp" usageType="active"/>
<MiningField name="INC_MON_CLEAN_POL_Curve" usageType="active"/>
<MiningField name="INC_NAF_IND_3YR_Grp" usageType="active"/>
<MiningField name="INC_NAF_IND_5YR_Grp" usageType="active"/>
<MiningField name="INC_NCAF_IND_5YR_Grp" usageType="active"/>
<MiningField name="LOC_RISK_ST_ALPHA_CD_Grp" usageType="active"/>
<MiningField name="LOC_RISK_ST_NF" usageType="active"/>
<MiningField name="LOC_RTZIP_NE_BLZIP_IND" usageType="active"/>
<MiningField name="POL_COV_COMPOSITION_Grp" usageType="active"/>
<MiningField name="POL_DR_VEH_CNT_2_Grp" usageType="active"/>
<MiningField name="POL_HOMEOWNERSHIP_Grp" usageType="active"/>
<MiningField name="POL_MRY_CNT_Grp" usageType="active"/>
<MiningField name="POL_NEW_VEH_AGE_999" usageType="active"/>
<MiningField name="POL_NEW_VEH_AGE_Curve" usageType="active"/>
<MiningField name="POL_OVRALL_MULTILN_IND" usageType="active"/>
<MiningField name="POL_PFM_LVL_CD_Grp" usageType="active"/>
<MiningField name="POL_PFM_LVL_CD_Curve" usageType="active"/>
<MiningField name="POL_PFM_LVL_CD_Grp2" usageType="active"/>
<MiningField name="POL_PRIR_ST_MIN_BI_LMT_IND" usageType="active"/>
<MiningField name="POL_SRC_OF_BUS_CD_Grp" usageType="active"/>
<MiningField name="POL_TENURE_Curve" usageType="active"/>
<MiningField name="POL_TRM_RNEW_DSCR" usageType="active"/>
<MiningField name="POL_YNG_LST_DRV_AGE_YR_Curve1" usageType="active"/>
<MiningField name="POL_VEH_CNT_Curve" usageType="active"/>
<MiningField name="POL_YOUGEST_SEX_DERIVED" usageType="active"/>
<MiningField name="POL_YOUTH_CNT_Grp" usageType="active"/>
<MiningField name="VEH_CARFAX_Recent_Annual_Mileage_BLANK" usageType="active"/>
<MiningField name="VEH_CARFAX_Recent_Annual_Mileage_Curve" usageType="active"/>
</MiningSchema>
<Output>
<OutputField name="Predicted_INC_NAF_CURRTRM_RES" feature="predictedValue"/>
</Output>
<ParameterList>
<Parameter name="p0" label="(Intercept)"/>
<Parameter name="p1" label="BI_LIMIT_GROUP_GrpLow"/>
<Parameter name="p2" label="BI_LIMIT_GROUP_GrpMedium"/>
<Parameter name="p3" label="BI_LIMIT_GROUP_GrpVery High"/>
<Parameter name="p4" label="BI_ST_MIN_INDY"/>
<Parameter name="p5" label="BI_POL_LMT_CSL_INDY"/>
<Parameter name="p6" label="DRV_PRNCPL_INS_AGE_Curve"/>
<Parameter name="p7" label="LocationScore_MetScore"/>
<Parameter name="p8" label="INC_CAF_IND_5YR_Grp"/>
<Parameter name="p9" label="INC_COMP_IND_3YR"/>
<Parameter name="p10" label="INC_COMP_IND_5YR"/>
<Parameter name="p11" label="INC_MON_CLEAN_POL_GrpNQ"/>
<Parameter name="p12" label="INC_MON_CLEAN_POL_Curve"/>
<Parameter name="p13" label="INC_NAF_IND_3YR_Grp"/>
<Parameter name="p14" label="INC_NAF_IND_5YR_Grp"/>
<Parameter name="p15" label="INC_NCAF_IND_5YR_Grp"/>
<Parameter name="p16" label="LOC_RISK_ST_ALPHA_CD_GrpTier-2"/>
<Parameter name="p17" label="LOC_RISK_ST_ALPHA_CD_GrpTier-3"/>
<Parameter name="p18" label="LOC_RISK_ST_ALPHA_CD_GrpTier-4"/>
<Parameter name="p19" label="LOC_RISK_ST_ALPHA_CD_GrpTier-5"/>
<Parameter name="p20" label="LOC_RISK_ST_ALPHA_CD_GrpTier-6"/>
<Parameter name="p21" label="LOC_RISK_ST_NFM"/>
<Parameter name="p22" label="LOC_RISK_ST_NFN/A"/>
<Parameter name="p23" label="LOC_RISK_ST_NFV"/>
<Parameter name="p24" label="LOC_RTZIP_NE_BLZIP_INDY"/>
<Parameter name="p25" label="POL_COV_COMPOSITION_GrpOther"/>
<Parameter name="p26" label="POL_COV_COMPOSITION_GrpSOME FULL"/>
<Parameter name="p27" label="POL_DR_VEH_CNT_2_GrpV2+_D<V"/>
<Parameter name="p28" label="POL_DR_VEH_CNT_2_GrpV2+_D>=V"/>
<Parameter name="p29" label="POL_HOMEOWNERSHIP_GrpNO HOME"/>
<Parameter name="p30" label="POL_MRY_CNT_Grp1"/>
<Parameter name="p31" label="POL_MRY_CNT_Grp2"/>
<Parameter name="p32" label="POL_MRY_CNT_Grpother"/>
<Parameter name="p33" label="POL_NEW_VEH_AGE_999other"/>
<Parameter name="p34" label="POL_NEW_VEH_AGE_Curve"/>
<Parameter name="p35" label="POL_OVRALL_MULTILN_INDY"/>
<Parameter name="p36" label="POL_PFM_LVL_CD_GrpPFM1"/>
<Parameter name="p37" label="POL_PFM_LVL_CD_GrpPFM2&3"/>
<Parameter name="p38" label="POL_PFM_LVL_CD_GrpPFM4"/>
<Parameter name="p39" label="POL_PFM_LVL_CD_GrpScore"/>
<Parameter name="p40" label="POL_PFM_LVL_CD_GrpUnknown"/>
<Parameter name="p41" label="POL_PFM_LVL_CD_Curve"/>
<Parameter name="p42" label="POL_PFM_LVL_CD_Grp2Other"/>
<Parameter name="p43" label="POL_PRIR_ST_MIN_BI_LMT_INDY"/>
<Parameter name="p44" label="POL_SRC_OF_BUS_CD_GrpGPC/USD"/>
<Parameter name="p45" label="POL_SRC_OF_BUS_CD_GrpIA/PCS"/>
<Parameter name="p46" label="POL_TENURE_Curve"/>
<Parameter name="p47" label="POL_TRM_RNEW_DSCRRENEW"/>
<Parameter name="p48" label="POL_YNG_LST_DRV_AGE_YR_Curve1"/>
<Parameter name="p49" label="POL_VEH_CNT_Curve"/>
<Parameter name="p50" label="POL_YOUGEST_SEX_DERIVEDM"/>
<Parameter name="p51" label="POL_YOUTH_CNT_Grp1"/>
<Parameter name="p52" label="POL_YOUTH_CNT_Grp2"/>
<Parameter name="p53" label="VEH_CARFAX_Recent_Annual_Mileage_BLANKNon-BLANK"/>
<Parameter name="p54" label="VEH_CARFAX_Recent_Annual_Mileage_Curve"/>
</ParameterList>
<FactorList>
<Predictor name="BI_LIMIT_GROUP_Grp"/>
<Predictor name="BI_ST_MIN_IND"/>
<Predictor name="BI_POL_LMT_CSL_IND"/>
<Predictor name="INC_MON_CLEAN_POL_Grp"/>
<Predictor name="LOC_RISK_ST_ALPHA_CD_Grp"/>
<Predictor name="LOC_RISK_ST_NF"/>
<Predictor name="LOC_RTZIP_NE_BLZIP_IND"/>
<Predictor name="POL_COV_COMPOSITION_Grp"/>
<Predictor name="POL_DR_VEH_CNT_2_Grp"/>
<Predictor name="POL_HOMEOWNERSHIP_Grp"/>
<Predictor name="POL_MRY_CNT_Grp"/>
<Predictor name="POL_NEW_VEH_AGE_999"/>
<Predictor name="POL_OVRALL_MULTILN_IND"/>
<Predictor name="POL_PFM_LVL_CD_Grp"/>
<Predictor name="POL_PFM_LVL_CD_Grp2"/>
<Predictor name="POL_PRIR_ST_MIN_BI_LMT_IND"/>
<Predictor name="POL_SRC_OF_BUS_CD_Grp"/>
<Predictor name="POL_TRM_RNEW_DSCR"/>
<Predictor name="POL_YOUGEST_SEX_DERIVED"/>
<Predictor name="POL_YOUTH_CNT_Grp"/>
<Predictor name="VEH_CARFAX_Recent_Annual_Mileage_BLANK"/>
</FactorList>
<CovariateList>
<Predictor name="DRV_PRNCPL_INS_AGE_Curve"/>
<Predictor name="LocationScore_MetScore"/>
<Predictor name="INC_CAF_IND_5YR_Grp"/>
<Predictor name="INC_COMP_IND_3YR"/>
<Predictor name="INC_COMP_IND_5YR"/>
<Predictor name="INC_MON_CLEAN_POL_Curve"/>
<Predictor name="INC_NAF_IND_3YR_Grp"/>
<Predictor name="INC_NAF_IND_5YR_Grp"/>
<Predictor name="INC_NCAF_IND_5YR_Grp"/>
<Predictor name="POL_NEW_VEH_AGE_Curve"/>
<Predictor name="POL_PFM_LVL_CD_Curve"/>
<Predictor name="POL_TENURE_Curve"/>
<Predictor name="POL_YNG_LST_DRV_AGE_YR_Curve1"/>
<Predictor name="POL_VEH_CNT_Curve"/>
<Predictor name="VEH_CARFAX_Recent_Annual_Mileage_Curve"/>
</CovariateList>
<PPMatrix>
<PPCell value="Low" predictorName="BI_LIMIT_GROUP_Grp" parameterName="p1"/>
<PPCell value="Medium" predictorName="BI_LIMIT_GROUP_Grp" parameterName="p2"/>
<PPCell value="Very High" predictorName="BI_LIMIT_GROUP_Grp" parameterName="p3"/>
<PPCell value="Y" predictorName="BI_ST_MIN_IND" parameterName="p4"/>
<PPCell value="Y" predictorName="BI_POL_LMT_CSL_IND" parameterName="p5"/>
<PPCell value="1" predictorName="DRV_PRNCPL_INS_AGE_Curve" parameterName="p6"/>
<PPCell value="1" predictorName="LocationScore_MetScore" parameterName="p7"/>
<PPCell value="1" predictorName="INC_CAF_IND_5YR_Grp" parameterName="p8"/>
<PPCell value="1" predictorName="INC_COMP_IND_3YR" parameterName="p9"/>
<PPCell value="1" predictorName="INC_COMP_IND_5YR" parameterName="p10"/>
<PPCell value="NQ" predictorName="INC_MON_CLEAN_POL_Grp" parameterName="p11"/>
<PPCell value="1" predictorName="INC_MON_CLEAN_POL_Curve" parameterName="p12"/>
<PPCell value="1" predictorName="INC_NAF_IND_3YR_Grp" parameterName="p13"/>
<PPCell value="1" predictorName="INC_NAF_IND_5YR_Grp" parameterName="p14"/>
<PPCell value="1" predictorName="INC_NCAF_IND_5YR_Grp" parameterName="p15"/>
<PPCell value="Tier-2" predictorName="LOC_RISK_ST_ALPHA_CD_Grp" parameterName="p16"/>
<PPCell value="Tier-3" predictorName="LOC_RISK_ST_ALPHA_CD_Grp" parameterName="p17"/>
<PPCell value="Tier-4" predictorName="LOC_RISK_ST_ALPHA_CD_Grp" parameterName="p18"/>
<PPCell value="Tier-5" predictorName="LOC_RISK_ST_ALPHA_CD_Grp" parameterName="p19"/>
<PPCell value="Tier-6" predictorName="LOC_RISK_ST_ALPHA_CD_Grp" parameterName="p20"/>
<PPCell value="M" predictorName="LOC_RISK_ST_NF" parameterName="p21"/>
<PPCell value="N/A" predictorName="LOC_RISK_ST_NF" parameterName="p22"/>
<PPCell value="V" predictorName="LOC_RISK_ST_NF" parameterName="p23"/>
<PPCell value="Y" predictorName="LOC_RTZIP_NE_BLZIP_IND" parameterName="p24"/>
<PPCell value="Other" predictorName="POL_COV_COMPOSITION_Grp" parameterName="p25"/>
<PPCell value="SOME FULL" predictorName="POL_COV_COMPOSITION_Grp" parameterName="p26"/>
<PPCell value="V2+_D<V" predictorName="POL_DR_VEH_CNT_2_Grp" parameterName="p27"/>
<PPCell value="V2+_D>=V" predictorName="POL_DR_VEH_CNT_2_Grp" parameterName="p28"/>
<PPCell value="NO HOME" predictorName="POL_HOMEOWNERSHIP_Grp" parameterName="p29"/>
<PPCell value="1" predictorName="POL_MRY_CNT_Grp" parameterName="p30"/>
<PPCell value="2" predictorName="POL_MRY_CNT_Grp" parameterName="p31"/>
<PPCell value="other" predictorName="POL_MRY_CNT_Grp" parameterName="p32"/>
<PPCell value="other" predictorName="POL_NEW_VEH_AGE_999" parameterName="p33"/>
<PPCell value="1" predictorName="POL_NEW_VEH_AGE_Curve" parameterName="p34"/>
<PPCell value="Y" predictorName="POL_OVRALL_MULTILN_IND" parameterName="p35"/>
<PPCell value="PFM1" predictorName="POL_PFM_LVL_CD_Grp" parameterName="p36"/>
<PPCell value="PFM2&3" predictorName="POL_PFM_LVL_CD_Grp" parameterName="p37"/>
<PPCell value="PFM4" predictorName="POL_PFM_LVL_CD_Grp" parameterName="p38"/>
<PPCell value="Score" predictorName="POL_PFM_LVL_CD_Grp" parameterName="p39"/>
<PPCell value="Unknown" predictorName="POL_PFM_LVL_CD_Grp" parameterName="p40"/>
<PPCell value="1" predictorName="POL_PFM_LVL_CD_Curve" parameterName="p41"/>
<PPCell value="2Other" predictorName="POL_PFM_LVL_CD_Grp" parameterName="p42"/>
<PPCell value="Other" predictorName="POL_PFM_LVL_CD_Grp2" parameterName="p42"/>
<PPCell value="Y" predictorName="POL_PRIR_ST_MIN_BI_LMT_IND" parameterName="p43"/>
<PPCell value="GPC/USD" predictorName="POL_SRC_OF_BUS_CD_Grp" parameterName="p44"/>
<PPCell value="IA/PCS" predictorName="POL_SRC_OF_BUS_CD_Grp" parameterName="p45"/>
<PPCell value="1" predictorName="POL_TENURE_Curve" parameterName="p46"/>
<PPCell value="RENEW" predictorName="POL_TRM_RNEW_DSCR" parameterName="p47"/>
<PPCell value="1" predictorName="POL_YNG_LST_DRV_AGE_YR_Curve1" parameterName="p48"/>
<PPCell value="1" predictorName="POL_VEH_CNT_Curve" parameterName="p49"/>
<PPCell value="M" predictorName="POL_YOUGEST_SEX_DERIVED" parameterName="p50"/>
<PPCell value="1" predictorName="POL_YOUTH_CNT_Grp" parameterName="p51"/>
<PPCell value="2" predictorName="POL_YOUTH_CNT_Grp" parameterName="p52"/>
<PPCell value="Non-BLANK" predictorName="VEH_CARFAX_Recent_Annual_Mileage_BLANK" parameterName="p53"/>
<PPCell value="1" predictorName="VEH_CARFAX_Recent_Annual_Mileage_Curve" parameterName="p54"/>
</PPMatrix>
<ParamMatrix>
<PCell parameterName="p0" df="1" beta="-6.89887110839231"/>
<PCell parameterName="p1" df="1" beta="-0.200243410527279"/>
<PCell parameterName="p2" df="1" beta="0.046520461226444"/>
<PCell parameterName="p3" df="1" beta="-0.0359335901035311"/>
<PCell parameterName="p4" df="1" beta="0.242638592846261"/>
<PCell parameterName="p5" df="1" beta="-0.0109479482659826"/>
<PCell parameterName="p6" df="1" beta="0.0703356288043734"/>
<PCell parameterName="p7" df="1" beta="0.560777774775141"/>
<PCell parameterName="p8" df="1" beta="0.0733018183148446"/>
<PCell parameterName="p9" df="1" beta="0.0354777686045661"/>
<PCell parameterName="p10" df="1" beta="0.0348224036435088"/>
<PCell parameterName="p11" df="1" beta="-0.0467284725558351"/>
<PCell parameterName="p12" df="1" beta="-0.0198909482783373"/>
<PCell parameterName="p13" df="1" beta="0.0249717713724488"/>
<PCell parameterName="p14" df="1" beta="0.178968713684109"/>
<PCell parameterName="p15" df="1" beta="0.152171268198043"/>
<PCell parameterName="p16" df="1" beta="0.12063515216564"/>
<PCell parameterName="p17" df="1" beta="0.196219481390912"/>
<PCell parameterName="p18" df="1" beta="0.221516201149234"/>
<PCell parameterName="p19" df="1" beta="0.386395054130119"/>
<PCell parameterName="p20" df="1" beta="0.525150698081918"/>
<PCell parameterName="p21" df="1" beta="-0.086152717496725"/>
<PCell parameterName="p22" df="1" beta="0.0302219017410602"/>
<PCell parameterName="p23" df="1" beta="0.106657620574331"/>
<PCell parameterName="p24" df="1" beta="0.0619761468310041"/>
<PCell parameterName="p25" df="1" beta="-0.761320053812693"/>
<PCell parameterName="p26" df="1" beta="-0.220971949685121"/>
<PCell parameterName="p27" df="1" beta="0.053039109091973"/>
<PCell parameterName="p28" df="1" beta="0.10992139929107"/>
<PCell parameterName="p29" df="1" beta="0.0316797808518037"/>
<PCell parameterName="p30" df="1" beta="0.0062173686216447"/>
<PCell parameterName="p31" df="1" beta="-0.070984632176239"/>
<PCell parameterName="p32" df="1" beta="0.83248838319892"/>
<PCell parameterName="p33" df="1" beta="1.79944846757453"/>
<PCell parameterName="p34" df="1" beta="-0.137721395263582"/>
<PCell parameterName="p35" df="1" beta="0.0491275452686831"/>
<PCell parameterName="p36" df="1" beta="-0.00676055423893139"/>
<PCell parameterName="p37" df="1" beta="0.097845120602549"/>
<PCell parameterName="p38" df="1" beta="0.251409070866154"/>
<PCell parameterName="p39" df="1" beta="-0.0246133646257348"/>
<PCell parameterName="p40" df="1" beta="0.0271622667560242"/>
<PCell parameterName="p41" df="1" beta="0.0424205763087955"/>
<PCell parameterName="p42" df="1" beta="-0.0241102160607328"/>
<PCell parameterName="p43" df="1" beta="0.0880797746220592"/>
<PCell parameterName="p44" df="1" beta="0.0398727479008459"/>
<PCell parameterName="p45" df="1" beta="-0.0308272779659326"/>
<PCell parameterName="p46" df="1" beta="-0.098953210482816"/>
<PCell parameterName="p47" df="1" beta="-0.0254116154376454"/>
<PCell parameterName="p48" df="1" beta="-0.100646198064465"/>
<PCell parameterName="p49" df="1" beta="0.334123873374726"/>
<PCell parameterName="p50" df="1" beta="-0.0820168952217712"/>
<PCell parameterName="p51" df="1" beta="0.0517883115146095"/>
<PCell parameterName="p52" df="1" beta="0.0770791651996997"/>
<PCell parameterName="p53" df="1" beta="0.177851241470253"/>
<PCell parameterName="p54" df="1" beta="0.245647879730038"/>
</ParamMatrix>
</GeneralRegressionModel>
</PMML>
@jperuggia Thanks for your model. I can reproduce the issue, which is caused by PMML4S always treats the binomial model with a categorical target, while the model has a continuous target INC_NAF_CURRTRM_RES
. Now I have updated PMML4S to support such a model.
You can build PMML4S by the command: sbt package
, then try the new jar. Please, let me know if you still have problem.
@scorebot I was able to build / package the repo locally and verify it worked as expected. There was a small hiccup where when I attempted to add the newly created .jar to my local maven and import that into my java project I was getting various issues, but worked around that by just replacing the referenced jar in the maven version that was downloaded and cached. I think there was a dependency issue ( no fault of your own).
Would you be able to update the pmml4s version that you are deploying so I can update my project to pull from the maven repository to version 0.9.8 instead of the one referenced below?
<dependency>
<groupId>org.pmml4s</groupId>
<artifactId>pmml4s_2.12</artifactId>
<version>0.9.7</version>
</dependency>
Again thank you for the fast turn around time on this!
@jperuggia There need for extra works to publish a new version, now the version 0.9.8
has been published, the maven central may need more time to pick it up, so you could try this new version later, maybe tomorrow will be ready.
I close this issue now. if you have other problems, please feel free to open a new one.
@scorebot I get the same issue. I'm using logistic regression to predict a binary outcome and I get NaN
for both classes. This is the version I have in my pom.xml
:
<dependency>
<groupId>org.pmml4s</groupId>
<artifactId>pmml4s_2.12</artifactId>
<version>0.9.11</version>
</dependency>
@oren0e Could you send your model to me for debugging? thanks.
@scorebot Sure, here it is. Thank you for such a quick reply
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<PMML xmlns="http://www.dmg.org/PMML-4_4" xmlns:data="http://jpmml.org/jpmml-model/InlineTable" version="4.4">
<Header>
<Application name="JPMML-SkLearn" version="1.6.25"/>
<Timestamp>2021-08-11T05:55:07Z</Timestamp>
</Header>
<MiningBuildTask>
<Extension name="repr">PMMLPipeline(steps=[('estimator', LogisticRegression(C=0.8, max_iter=3000, random_state=1473))])</Extension>
</MiningBuildTask>
<DataDictionary>
<DataField name="y" optype="categorical" dataType="integer">
<Value value="0"/>
<Value value="1"/>
</DataField>
<DataField name="x1" optype="continuous" dataType="double"/>
<DataField name="x2" optype="continuous" dataType="double"/>
<DataField name="x3" optype="continuous" dataType="double"/>
<DataField name="x4" optype="continuous" dataType="double"/>
<DataField name="x5" optype="continuous" dataType="double"/>
<DataField name="x6" optype="continuous" dataType="double"/>
<DataField name="x7" optype="continuous" dataType="double"/>
<DataField name="x8" optype="continuous" dataType="double"/>
<DataField name="x9" optype="continuous" dataType="double"/>
<DataField name="x10" optype="continuous" dataType="double"/>
</DataDictionary>
<RegressionModel functionName="classification" algorithmName="sklearn.linear_model._logistic.LogisticRegression" normalizationMethod="logit">
<MiningSchema>
<MiningField name="y" usageType="target"/>
<MiningField name="x1"/>
<MiningField name="x2"/>
<MiningField name="x3"/>
<MiningField name="x4"/>
<MiningField name="x5"/>
<MiningField name="x6"/>
<MiningField name="x7"/>
<MiningField name="x8"/>
<MiningField name="x9"/>
<MiningField name="x10"/>
</MiningSchema>
<Output>
<OutputField name="probability(0)" optype="continuous" dataType="double" feature="probability" value="0"/>
<OutputField name="probability(1)" optype="continuous" dataType="double" feature="probability" value="1"/>
</Output>
<RegressionTable intercept="-12.388419566106077" targetCategory="1">
<NumericPredictor name="x1" coefficient="-0.6792251802730798"/>
<NumericPredictor name="x2" coefficient="-0.47972195124900435"/>
<NumericPredictor name="x3" coefficient="-0.4553875619242739"/>
<NumericPredictor name="x4" coefficient="1.0015466950318137"/>
<NumericPredictor name="x5" coefficient="0.10358022565978549"/>
<NumericPredictor name="x6" coefficient="0.3200609443892687"/>
<NumericPredictor name="x7" coefficient="-0.7253594155602279"/>
<NumericPredictor name="x8" coefficient="0.06362242123138408"/>
<NumericPredictor name="x9" coefficient="0.38860981307088815"/>
<NumericPredictor name="x10" coefficient="5.442360328661457"/>
</RegressionTable>
<RegressionTable intercept="0.0" targetCategory="0"/>
</RegressionModel>
</PMML>
Actually now that I see it, do the DataField
names should be the variable names? Weird, I had names in python for it...
Update
I've solved the problem by changing the x1, x2, x3, ...
feature names to the correct names!
Side note: I get only output like {"probability(1)":0.0,"probability(0)":1.0}
How can I get the actual probability?
@oren0e Yes, you should input values of the correct fields defined in the PMML.
Both output fields probability(0)
and probability(1)
returned are expected, they are determined by the OutputFiled elements in your model above by default. Do you mean how to get the actual prediction? If yes, you can call the following method of the model to enable all possible outputs:
model.setSupplementOutput(true)
Thanks. It was indeed the probability, but the predictions were wrong because I was missing an imblearn
component. (python package for dealing with imbalanced data). This is how I solved it:
https://github.com/jpmml/sklearn2pmml/issues/293
I am also getting NaN in all predictions of Random forest regression. But the same is working fine, when I am saving pmml model and re-loading in python , for testing.
Trying to evaluate a PMML file generated by Rattle / PMML. Pmml file is version 4.3 so it should be supported by the PMML4S evaluator.
I've verfied that model introspection works fine as all inputs ,data types etc are returned as expected. When attempting to evaluate the model ( from Java) I've used arrays, series as well as HashMaps as inputs to see if there was any difference and all return NaN predictions.
The underlying model is defined as:
<GeneralRegressionModel modelName="SampleModelTest" modelType="generalizedLinear" functionName="regression" algorithmName="glm" distribution="binomial" linkFunction="logit">
There is one output field from the model.
When attempting to verify if there is an issue with PMML4S or the Pmml itself, I was able to execute and evaluate correctly when using the JPMML-Evalautor library. I've verified other PMML files have loaded and executed in pmml4s itself with similar outputs to JPMML.
Is there a way to see what is causing the NaN predictions within PMML4s or a method in which a sample pmml file can be provided for further debugging?