jpmml / jpmml-evaluator

Java Evaluator API for PMML
GNU Affero General Public License v3.0
892 stars 255 forks source link

Evaluate error #35

Closed Alexkevin closed 8 years ago

Alexkevin commented 8 years ago

Sorry to trouble you again~ The jpmml works well when I use LogisticRegression, but fails with other models like randomforest The model comes from sklearn, and I use your awesome tool sklearn2pmml

the error is

Exception in thread "main" org.jpmml.evaluator.EvaluationException
    at org.jpmml.evaluator.CategoricalValue.compareToString(CategoricalValue.java:39)
    at org.jpmml.evaluator.FieldValue.compareTo(FieldValue.java:139)
    at org.jpmml.evaluator.PredicateUtil.evaluateSimplePredicate(PredicateUtil.java:131)
    at org.jpmml.evaluator.PredicateUtil.evaluatePredicate(PredicateUtil.java:63)
    at org.jpmml.evaluator.PredicateUtil.evaluate(PredicateUtil.java:51)
    at org.jpmml.evaluator.tree.TreeModelEvaluator.evaluateNode(TreeModelEvaluator.java:201)
    at org.jpmml.evaluator.tree.TreeModelEvaluator.handleTrue(TreeModelEvaluator.java:218)
    at org.jpmml.evaluator.tree.TreeModelEvaluator.evaluateTree(TreeModelEvaluator.java:162)
    at org.jpmml.evaluator.tree.TreeModelEvaluator.evaluateClassification(TreeModelEvaluator.java:137)
    at org.jpmml.evaluator.tree.TreeModelEvaluator.evaluate(TreeModelEvaluator.java:106)
    at org.jpmml.evaluator.mining.MiningModelEvaluator.evaluateSegmentation(MiningModelEvaluator.java:407)
    at org.jpmml.evaluator.mining.MiningModelEvaluator.evaluateClassification(MiningModelEvaluator.java:240)
    at org.jpmml.evaluator.mining.MiningModelEvaluator.evaluate(MiningModelEvaluator.java:207)
    at org.jpmml.evaluator.mining.MiningModelEvaluator.evaluate(MiningModelEvaluator.java:185)
    at com.ctrip.hotelbi.jpmml.Score.gettingProbability(Score.java:32)
    at com.ctrip.hotelbi.jpmml.Score.gettingProbability(Score.java:53)
    at com.ctrip.hotelbi.jpmml.PMMLTest.main(PMMLTest.java:41)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)

The codes are

public class Process {
    private String[] data;
    private Evaluator evaluator;

    public Process() {
    }

    public Process(String[] data, Evaluator evaluator) {
        this.data = data;
        this.evaluator = evaluator;
    }

    public Map<FieldName, FieldValue> testData() {
        /**
         * Prepare test data
         * @return input data for prediction
         */
        Map<FieldName, FieldValue> arguments = new LinkedHashMap<>();
        List<InputField> inputs = this.evaluator.getActiveFields();
        for (InputField input : inputs) {
            FieldName activeName = input.getName();
            int i = inputs.indexOf(input);
            FieldValue activeValue = null;
            try {
                if (input.getDataType().equals(DataType.DOUBLE)) {
                    activeValue = input.prepare(Double.parseDouble(this.data[i]));
                }else activeValue = FieldValueUtil.create( this.data[i] );

            }catch (Exception e){
                activeValue = FieldValueUtil.create(0.0);
                e.printStackTrace();
            }
            arguments.put(activeName, activeValue);

        }

        return arguments;
    }
}

public class Score extends Process{
    private String[] data;
    private Evaluator evaluator;

    public Score(String[] data, Evaluator evaluator) {
        super(data, evaluator);
    }

    public ArrayList<?> gettingProbability(Evaluator evaluator){
        /**
         Predict all target label probabilities
         @param evaluator pmml model
         @return probability score of each label
         */
        Map<FieldName, FieldValue> testData = super.testData();

        ArrayList<Object> score = new ArrayList();

        System.out.println(testData.size());
        Map<FieldName,?> finalResults = evaluator.evaluate(testData);

        for(FieldName t : finalResults.keySet()){

            if (finalResults.get(t) instanceof Double) {
                score.add((Double) finalResults.get(t));
            }else{
                score.add(finalResults.get(t));
            }
        }
        return score;
    }

    public Double gettingProbability(Evaluator evaluator,int targetLabelIndex){
        /**
         Predict target label probability
         @param evaluator pmml model
         @param targetLabelIndex the index of target label that you want to predict
         @return probability score of each label
         */
        ArrayList<?> scoreArray = this.gettingProbability(evaluator);
        Double targetScore = (Double) scoreArray.get(targetLabelIndex);
        return targetScore;

    }
}

public class PMMLTest {
    public static void main(String[] args) throws IOException, JAXBException, SAXException {
        //Loading data
        CSVReader reader = new CSVReader(new FileReader("d:\\Users\\shuangyangwang\\Desktop\\JPMML\\Iris1.csv"));
        List<String[]> data = reader.readAll();
        data.remove(0);
        reader.close();

        //Loading model

        InputStream is = new FileInputStream("d:\\Users\\shuangyangwang\\Desktop\\Test\\ExtraTreesClassifier.pmml");
        PMML model = PMMLUtil.unmarshal(is);
        is.close();

        ModelEvaluatorFactory mef = ModelEvaluatorFactory.newInstance();
        ModelEvaluator<?> modelEvaluator = mef.newModelEvaluator(model);
        Evaluator evaluator = (Evaluator) modelEvaluator;
        evaluator.verify();

        //Predicting probability
        List<ArrayList<?>> listArray = new ArrayList<>();
        for (String[] s : data) {
//            PreprocessData ppd = new PreprocessData(s, evaluator);
//            Map<FieldName, FieldValue> testData = ppd.testData();
            Score scoreE = new Score(s, evaluator);
            //ArrayList<Double> result = (ArrayList<Double>) scoreE.gettingProbability(evaluator);
            Double score = scoreE.gettingProbability( evaluator ,1);
            System.out.println(score);
            //listArray.add(result);
        }

    }
}

I really don't know what is wrong with that, please give me some suggestions Thank you very much

vruusmann commented 8 years ago

Your PMML document contains a SimplePredicate element, which attempts to apply a comparison operator (<=, <, > and >=) to a categorical field value:

<SimplePredicate field="fruit" operator="lessThan" value="orange"/>

This is NOT permitted according to the PMML specification, so the following EvaluationException is perfectly "correct" (sure, this exception should have a proper message):

Exception in thread "main" org.jpmml.evaluator.EvaluationException 
  at org.jpmml.evaluator.CategoricalValue.compareToString(CategoricalValue.java:39) 
  at org.jpmml.evaluator.FieldValue.compareTo(FieldValue.java:139) 
  at org.jpmml.evaluator.PredicateUtil.evaluateSimplePredicate(PredicateUtil.java:131)

What is your sklearn2pmml/JPMML-SkLearn library version? It can be found on the PMML/Header/Application section of your PMML document. If it's anything less than "JPMML-SkLearn 1.1.0", then please upgrade first.

I don't think that sklearn2pmml is generating such SimplePredicate elements. Have you manually modified the PMML document yourself? Can you send it to me?

vruusmann commented 8 years ago

Looking at your Java code, I think that the following line is where those categorical field values creep in:

activeValue = FieldValueUtil.create(this.data[i]);

The utility method FieldUtil#create(Object) attempts to guess the data type and operational type based on the Java class. In this case, it is assigning a categorical optype to a field value that should have a continuous optype according to the PMML document.

You should ALWAYS do InputField#prepare(Object). And in order to be future-proof, you should replace Evaluator#getActiveFields() with Evaluator#getInputFields().

This is the correct way of preparing inputs in your use case:

List<InputField> inputFields = evaluator.getInputFields();
for(int i = 0; i < inputFields.size(); i++){
  InputField inputField = inputFields.get(i);
  arguments.put(inputField.getName(), inputField.prepare(data[i]));
}
Alexkevin commented 8 years ago

@vruusmann readlly really thank you ~ My codes work

sbourzai commented 7 years ago

Hi Sir, I have exported my model to pmml 4.3 and i try to import it into java project and i get this errors :

i think it is beacause of the :

InputSource source = new InputSource(is);
SAXSource transformedSource = ImportFilter.apply(source);
return JAXBUtil.unmarshalPMML(transformedSource); 

and i tried with the IOUtil but i have the same error :

java.lang.IllegalArgumentException: http://www.dmg.org/PMML-4_3
    at org.jpmml.schema.Version.forNamespaceURI(Version.java:47)
    at org.jpmml.model.PMMLFilter.updateSource(PMMLFilter.java:92)
    at org.jpmml.model.PMMLFilter.startPrefixMapping(PMMLFilter.java:41)
    at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startNamespaceMapping(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startElement(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.scanStartElement(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl$NSContentDriver.scanRootElementHook(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$PrologDriver.next(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown Source)
    at org.xml.sax.helpers.XMLFilterImpl.parse(Unknown Source)
    at com.sun.xml.bind.v2.runtime.unmarshaller.UnmarshallerImpl.unmarshal0(UnmarshallerImpl.java:258)
    at com.sun.xml.bind.v2.runtime.unmarshaller.UnmarshallerImpl.unmarshal(UnmarshallerImpl.java:229)
    at javax.xml.bind.helpers.AbstractUnmarshallerImpl.unmarshal(Unknown Source)
    at javax.xml.bind.helpers.AbstractUnmarshallerImpl.unmarshal(Unknown Source)
    at IOUtil.unmarshal(IOUtil.java:54)
    at IOUtil.unmarshal(IOUtil.java:47)
    at IOUtil.unmarshal(IOUtil.java:42)
    at IOUtil.unmarshal(IOUtil.java:34)
    at ConnectTry.main(ConnectTry.java:24)
Exception in thread "main" java.lang.NullPointerException
    at org.jpmml.evaluator.TreeModelEvaluator.<init>(TreeModelEvaluator.java:48)
    at ConnectTry.main(ConnectTry.java:33)
bachsh commented 6 years ago

@sbourzai I'm having the same issue. Did you solve it eventually?

sbourzai commented 6 years ago

Hello @bachsh Yes, try to add this to your pom file :

**<groupId>com.test.LinRegPMML</groupId>
  <artifactId>test-LinRegPMML</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>test-LinRegPMML</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>

  <dependencies>
    <dependency>
        <groupId>org.jpmml</groupId>
        <artifactId>pmml-evaluator</artifactId>
        <version>1.3.6</version>
    </dependency>
    <dependency>
        <groupId>org.jpmml</groupId>
        <artifactId>pmml-manager</artifactId>
        <version>1.1.20</version>
    </dependency>
    <dependency>
        <groupId>org.slf4j</groupId>
        <artifactId>slf4j-api</artifactId>
        <version>1.7.25</version>
    </dependency>
    <dependency>
        <groupId>com.beust</groupId>
        <artifactId>jcommander</artifactId>
        <version>1.72</version>
    </dependency>

    <dependency>
    <groupId>com.codahale.metrics</groupId>
    <artifactId>metrics-core</artifactId>
    <version>3.0.2</version>
</dependency>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.12</version>
        <scope>test</scope>
    </dependency>
  </dependencies>**
sbourzai commented 6 years ago

and I was able to read import my model by using this :

@Test
    public void predicExample() throws Exception {

        // Loading a PMML schema version 4.X document into an org.dmg.pmml.PMML instance:
        InputStream is = new FileInputStream("LinReg_PMML.xml");
        PMML pmml = PMMLUtil.unmarshal(is);

        // if the model type is known (here: "LinearRegression",
        Evaluator evaluator = new RegressionModelEvaluator(pmml);

        // if the model type is unknown
         Evaluator evaluator2 = ModelEvaluatorFactory.newInstance().newModelEvaluator(pmml);
bachsh commented 6 years ago

@sbourzai Thank you for the answer! The reason I got my error was the conflict between an old JPMML-Model version which is included in SparkML. The full solution, as recommended by Villu (the creator of JPMML) is to shade the package https://github.com/jpmml/jpmml-sparkml#library

For me, importing the JPMML-Model dependency before importing the SparkML dependency solved the issue. I'm using jupyter-scala so it looks like this

import $ivy.`org.jpmml:pmml-model:1.3.9`
import $ivy.`org.apache.spark::spark-mllib:2.1.0`
sbourzai commented 6 years ago

Great ! Thanks for the information 👍 Good Luck

diogofernandes63 commented 5 years ago

I get the error:

Exception in thread "main" org.jpmml.evaluator.EvaluationException (at or around line 156 of the PMML document): Collection value cannot be used in comparison operations
    at org.jpmml.evaluator.CollectionValue.compareToValue(CollectionValue.java:62)
    at org.jpmml.evaluator.FieldValue.compareTo(FieldValue.java:127)
    at org.jpmml.evaluator.PredicateUtil.evaluateSimplePredicate(PredicateUtil.java:162)
    at org.jpmml.evaluator.PredicateUtil.evaluatePredicate(PredicateUtil.java:83)
    at org.jpmml.evaluator.PredicateUtil.evaluate(PredicateUtil.java:73)
    at org.jpmml.evaluator.tree.TreeModelEvaluator.evaluateNode(TreeModelEvaluator.java:200)
    at org.jpmml.evaluator.tree.TreeModelEvaluator.handleTrue(TreeModelEvaluator.java:217)
    at org.jpmml.evaluator.tree.TreeModelEvaluator.evaluateTree(TreeModelEvaluator.java:164)
    at org.jpmml.evaluator.tree.TreeModelEvaluator.evaluateRegression(TreeModelEvaluator.java:111)
    at org.jpmml.evaluator.ModelEvaluator.evaluateInternal(ModelEvaluator.java:645)
    at org.jpmml.evaluator.mining.MiningModelEvaluator.evaluateSegmentation(MiningModelEvaluator.java:508)
    at org.jpmml.evaluator.mining.MiningModelEvaluator.evaluateRegression(MiningModelEvaluator.java:214)
    at org.jpmml.evaluator.ModelEvaluator.evaluateInternal(ModelEvaluator.java:645)
    at org.jpmml.evaluator.mining.MiningModelEvaluator.evaluateInternal(MiningModelEvaluator.java:207)
    at org.jpmml.evaluator.ModelEvaluator.evaluate(ModelEvaluator.java:557)
    at javaapplication3.ola.run(ola.java:118)

And the code is:

public void run(String [] args) throws Exception {
     this.modelEvaluator=null;
    try{ 
          modelEvaluator=loadModel("/Users/diogoduarte/Desktop/2ºdataset/model.pmml");
        } catch(Exception e){
            e.printStackTrace();
        } 

modelEvaluator.verify();

List<? extends InputField> inputFields = modelEvaluator.getInputFields();
System.out.println("Input fields: " + inputFields);

            Map<FieldName,FieldValue>arguments=new LinkedHashMap<>();
            List<InputField>inputfields=modelEvaluator.getInputFields();

           analogs=addvalue();
           System.out.println("line " + analogs);

            for(InputField inputfield: inputfields){ 
                FieldName inputfieldName=inputfield.getName();
                Object rawValue=analogs;        
                FieldValue inputfieldValue=inputfield.prepare(rawValue);
                arguments.put(inputfieldName, inputfieldValue);
            }

        Map<FieldName,?> results=modelEvaluator.evaluate(arguments);

        List<TargetField> targetFields=modelEvaluator.getTargetFields();
        System.out.println("Target field(s): " + targetFields);
            for(TargetField targetField: targetFields){
                FieldName targetFieldName=targetField.getName();
                Object targetFieldValue=results.get(targetFieldName);
            }

        List<org.jpmml.evaluator.OutputField> outputFields = modelEvaluator.getOutputFields();
       System.out.println("Output fields: " + outputFields);
 }   
        private static ModelEvaluator<?> loadModel(final String file) throws Exception {
             ModelEvaluator<?>modelEvaluator=null;
             File inputFilePath=new File(file);
             try(InputStream is=new FileInputStream(inputFilePath)){
             PMML pmml=org.jpmml.model.PMMLUtil.unmarshal(is);
             ModelEvaluatorFactory mef= ModelEvaluatorFactory.newInstance();
             modelEvaluator=mef.newModelEvaluator(pmml);
            } catch (Exception e) {
            throw e;
            }
            return modelEvaluator;
           }

        private static List<Float> addvalue() throws FileNotFoundException, IOException {
                  List<Float>records1=new ArrayList<>();
                    records1.add(1.0f);
            records1.add(1.0f);
                    records1.add(0.0023f);
                    records1.add(0.003f);
                    records1.add(100.0f);
                    records1.add(518.67f);
            records1.add(643.02f);
                    records1.add(1585.29f);
                    records1.add(1398.21f);
                    records1.add(14.62f);
                    records1.add(21.61f);
            records1.add(553.90f);
                    records1.add(2388.04f);
                    records1.add(9050.17f);
                    records1.add(1.30f);
                    records1.add(47.20f);
            records1.add(521.72f);
                    records1.add(2388.03f);
                    records1.add(8125.55f);
                    records1.add(8.4052f);
                    records1.add(0.03f);
                    records1.add(392.0f);
                    records1.add(2388.0f);
                    records1.add(100.00f);
            records1.add(38.86f);
                    records1.add(23.3735f);
                return records1;
    }
vruusmann commented 5 years ago

@diogofernandes63 It's not a good style to add comments to already closed issues. Especially if your issue is different.

The exception message is very clear here - "Collection value cannot be used in comparison operations"

For example, the following code snippet [0, 1.5, 1] <= 1 is not a valid/sensible Python code, and it's also not a valid PMML code/markup.