axa-group / nlp.js

An NLP library for building bots, with entity extraction, sentiment analysis, automatic language identify, and so more
MIT License
6.28k stars 621 forks source link

Entities in response are not selected by accuracy #307

Closed cahnory closed 1 year ago

cahnory commented 5 years ago

Describe the bug If several occurrences of the same entity are found in an utterance, the answer seems to use the one with the lowest position.

To Reproduce

const { NlpManager, ConversationContext } = require('node-nlp');

async function main() {
  const manager = new NlpManager({ languages: ['en'] });
  const ctx = new ConversationContext();

  manager.addNamedEntityText('style', 'rock', ['en'], ['rock', 'Rock']);

  manager.slotManager.addSlot('music', 'style', true, {
    en: 'What style of music do you want?',
  });
  manager.addDocument('en', 'Play me some music', 'music');
  // This next line does not change anything but it's something I tried
  manager.addDocument('en', 'I want some %style%', 'music');
  manager.addAnswer('en', 'music', "Ok let's play some {{style}}");

  await manager.train();
  await manager.process('en', 'Play me some music', ctx);
  const result = await manager.process('en', 'I want some rock', ctx);
  console.log(JSON.stringify(result, null, 2));
}

main();

Here the log of the last process response. You can see one entity with a perfect accuracy smashed by the next one:

{
  "utterance": "I want some rock",
  "locale": "en",
  "languageGuessed": false,
  "localeIso2": "en",
  "language": "English",
  "domain": "default",
  "classifications": [
    {
      "label": "music",
      "value": 1
    }
  ],
  "intent": "music",
  "score": 1,
  "entities": [
    {
      "start": 12,
      "end": 15,
      "len": 4,
      "levenshtein": 0,
      "accuracy": 1,
      "option": "rock",
      "sourceText": "rock",
      "entity": "style",
      "utteranceText": "rock"
    },
    {
      "entity": "style",
      "utteranceText": "I want some rock",
      "sourceText": "I want some rock",
      "accuracy": 0.95,
      "start": 0,
      "end": 15,
      "len": 16
    }
  ],
  "sourceEntities": [],
  "sentiment": {
    "score": 0.475,
    "comparative": 0.11875,
    "vote": "positive",
    "numWords": 4,
    "numHits": 4,
    "type": "senticon",
    "language": "en"
  },
  "actions": [],
  "srcAnswer": "Ok let's play some {{style}}",
  "answer": "Ok let's play some I want some rock"
}

Expected behavior The entity used in answers should be the one with the higher accuracy value.

Desktop (please complete the following information):

Additional context The problem seems to be here https://github.com/axa-group/nlp.js/blob/master/lib/nlp/nlp-manager.js#L451 and probably here too https://github.com/axa-group/nlp.js/blob/master/lib/nlp/nlp-manager.js#L433

jesus-seijas-sp commented 5 years ago

It seems related to slot filling... the full sentence is received by the slot filling as the answer, and then an entity is extracted from it... but the previous sentence is not removed.

Confirmed bug

Apollon77 commented 2 years ago

@jesus-seijas-sp I also stumbled upon this ...

Apollon77 commented 2 years ago

This is fixed in #1164 ... then the fallback entity is only added in slot filling when the entitiy is not alreayd parsed in the meantime