axa-group / nlp.js

An NLP library for building bots, with entity extraction, sentiment analysis, automatic language identify, and so more
MIT License
6.22k stars 616 forks source link

Can't use Date extraction in Portuguese #188

Closed luizhenrique07 closed 1 year ago

luizhenrique07 commented 5 years ago

I tried to extract date in Portuguese but did not have any success using formats different than DD/MM/YYYY even though it does have support for it on the docs : https://github.com/axa-group/nlp.js/blob/master/docs/language-support.md

You can reproduce the behavior with the bellow example code:

Portuguese example

const { NlpManager, ConversationContext } = require("node-nlp");

const manager = new NlpManager({ languages: ['pt'] });

manager.addDocument('pt', 'Vou ir na segunda', 'portuguese.date');
manager.addDocument('pt', 'Dia 19 estarei lá', 'portuguese.date');
manager.addDocument('pt', 'Vai ser dia 26 de maio', 'portuguese.date');
manager.addDocument('pt', 'Dia 1 eu vou ir lá', 'portuguese.date');
manager.addDocument('pt', 'Vou ir amanhã', 'portuguese.date');
manager.addAnswer('pt', 'Repondendo a data', 'portuguese.date');

manager.train()
  .then(result => manager.process('pt', 'Vou ir dia 20'))
  .then(result => console.log(JSON.stringify(result, null, 2)));
Output
{
  "utterance": "Vou ir dia 20",
  "locale": "pt",
  "languageGuessed": false,
  "localeIso2": "pt",
  "language": "Portuguese",
  "domain": "default",
  "classifications": [
    {
      "label": "portuguese.date",
      "value": 1
    }
  ],
  "intent": "portuguese.date",
  "score": 1,
  "entities": [
    {
      "start": 7,
      "end": 9,
      "len": 3,
      "accuracy": 0.95,
      "sourceText": "dia",
      "utteranceText": "dia",
      "entity": "age",
      "resolution": {
        "strValue": null,
        "value": null,
        "unit": "Day",
        "localeUnit": "Dia"
      }
    },
    {
      "start": 11,
      "end": 12,
      "len": 2,
      "accuracy": 0.95,
      "sourceText": "20",
      "utteranceText": "20",
      "entity": "number",
      "resolution": {
        "strValue": "20",
        "value": 20,
        "subtype": "integer"
      }
    }
  ],
  "sentiment": {
    "score": 0,
    "comparative": 0,
    "vote": "neutral",
    "numWords": 3,
    "numHits": 0,
    "type": "senticon",
    "language": "pt"
  },
  "actions": []
}

When I use it in English it works as expected

English example

const { NlpManager, ConversationContext } = require("node-nlp");

const manager = new NlpManager({ languages: ["en"] });
manager.addDocument("en", "I'll be there on monday", "english.date");
manager.addDocument("en", "Next friday I will be there", "english.date");
manager.addDocument("en", "I'll go back on 16", "english.date");
manager.addAnswer("en", "Repondendo a data", "english.date");

manager
  .train()
  .then(result => manager.process("en", "I'll go back on 16"))
  .then(result => console.log(JSON.stringify(result, null, 2)));
Output
{
  "utterance": "I'll go back on 16",
  "locale": "en",
  "languageGuessed": false,
  "localeIso2": "en",
  "language": "English",
  "domain": "default",
  "classifications": [
    {
      "label": "english.date",
      "value": 1
    }
  ],
  "intent": "english.date",
  "score": 1,
  "entities": [
    {
      "start": 16,
      "end": 17,
      "len": 2,
      "accuracy": 0.95,
      "sourceText": "16",
      "utteranceText": "16",
      "entity": "number",
      "resolution": {
        "strValue": "16",
        "value": 16,
        "subtype": "integer"
      }
    },
    {
      "start": 16,
      "end": 17,
      "len": 2,
      "accuracy": 0.95,
      "sourceText": "16",
      "utteranceText": "16",
      "entity": "date",
      "resolution": {
        "type": "interval",
        "timex": "XXXX-XX-16",
        "strPastValue": "2019-04-16",
        "pastDate": "2019-04-16T00:00:00.000Z",
        "strFutureValue": "2019-05-16",
        "futureDate": "2019-05-16T00:00:00.000Z"
      }
    }
  ],
  "sentiment": {
    "score": 0.5,
    "comparative": 0.08333333333333333,
    "vote": "positive",
    "numWords": 6,
    "numHits": 1,
    "type": "senticon",
    "language": "en"
  },
  "actions": []
}
veigaribo commented 5 years ago

I was investigating that and, looking at this file from @microsoft/recognizers-text-suite (that is used at least for entity extraction), it seems to me that, even though such extractions are supported in .NET, they're probably not supported in the JS/Node version because of all those "NotSupported": "javascript, python" (the Specs folder afaik is used for testing). Also here, in the JavaScript section for DateTime recognition, I'd expect to find a folder for Portuguese, like the one here, in the .NET section, but there isn't any.

So the table at https://github.com/axa-group/nlp.js/blob/master/docs/language-support.md is probably wrong, and Portuguese should actually be a "see 4".

thesocialdev commented 3 years ago

@luizhenrique07 did you make any progress on it?

aigloss commented 1 year ago

Closing due to inactivity