erelsgl / limdu

Machine-learning for Node.js
GNU Lesser General Public License v3.0
1.05k stars 99 forks source link

Label Classification Result correct? #54

Open yoyhosoft opened 7 years ago

yoyhosoft commented 7 years ago

Hi, I have some question as below:

Here is my code:

var limdu = require('limdu');

// First, define our base classifier type (a multi-label classifier based on winnow):
var TextClassifier = limdu.classifiers.multilabel.BinaryRelevance.bind(0, {
    binaryClassifierType: limdu.classifiers.Winnow.bind(0, {retrain_count: 10})
});

// Now define our feature extractor - a function that takes a sample and adds features to a given features set:
var WordExtractor = function(input, features) {
    input.split(" ").forEach(function(word) {
        features[word]=1;
    });
};

// Initialize a classifier with the base classifier type and the feature extractor:
var intentClassifier = new limdu.classifiers.EnhancedClassifier({
    classifierType: TextClassifier,
    featureExtractor: WordExtractor,
    normalizer: limdu.features.LowerCaseNormalizer,
    pastTrainingSamples: [], // to enable retraining
});

// Train and test:
intentClassifier.trainBatch([
    {input: "I want an apple", output: "apl"},
    {input: "I want a banana", output: "bnn"},
    {input: "I want chips", output:    "cps"},
]);

console.log( intentClassifier.classify("I want chips and a doughnut") );
intentClassifier.trainOnline("I want", "req");
intentClassifier.trainOnline("You want", "req");
intentClassifier.trainOnline("We want", "req");
intentClassifier.trainOnline("They want", "req");
intentClassifier.trainOnline("He want", "req");
intentClassifier.trainOnline("She want", "req");
intentClassifier.trainOnline("It want", "req");
intentClassifier.trainOnline("I want a doughnut", "dnt");
console.log( intentClassifier.classify("I want chips and a doughnut") );
intentClassifier.retrain();
console.log( intentClassifier.classify("I want chips and a doughnut") );

An actual results are:

[ 'cps' ]
[ 'dnt', 'cps' ]
[ 'dnt', 'cps' ]

There're correct results or should be like this:

[ 'cps' ]
[ 'dnt', 'cps' ]
[ 'req', 'dnt', 'cps' ]
erelsgl commented 7 years ago

That's a good question. I do not know why "req" is not returned. It may be worthwhile to use the "explain" feature and see the actual numeric feature-vectors.

Unrealize commented 7 years ago
intentClassifier = new limdu.classifiers.EnhancedClassifier({
    classifierType: TextClassifier,  
    normalizer: limdu.features.LowerCaseNormalizer,
    featureExtractor: WordExtractor  
});
intentClassifier.trainOnline("switch on the fan", "fanon");
intentClassifier.trainOnline("switch off the fan", "fanoff");
console.dir(intentClassifier.classify("switch on the fan pls",4));

and the result:

positive: fanoff: (4) ["switch+0.80", "the+0.80", "fan+0.80", "bias+0.80"] fanon: (4) ["on+0.64", "switch+0.14", "the+0.14", "fan+0.14"]

why fan off has a higher number than fan on?