axa-group / nlp.js

An NLP library for building bots, with entity extraction, sentiment analysis, automatic language identify, and so more
MIT License
6.23k stars 616 forks source link

Entity recognition on web? #843

Closed obaid closed 3 years ago

obaid commented 3 years ago

I can't seem to get any entity recognization to work on this example: https://github.com/jesus-seijas-sp/nlpjs-examples/tree/master/02.web

so when i run with process('en', 'I am 24') .. i expect it to find an entity number for 24.

I am sure I am missing something minor in how I am setting things up. Any tips?

obaid commented 3 years ago

Any ideas on this?

jesus-seijas-sp commented 3 years ago

NER works on web. What does not works by default is templating to replace context variables in the template strings, but here you have an issue that explains how to solve it: https://github.com/axa-group/nlp.js/issues/788

On the other hand, NER works, but not Golden/Builtin Entities. For builtin entities you know that you have 4 possible packages:

Example, this code works in browser (bundle size 371KB)

const { containerBootstrap } = require('@nlpjs/core');
const { Nlp } = require('@nlpjs/nlp');
const { LangEn } = require('@nlpjs/lang-en-min');
const { Evaluator, Template } = require('@nlpjs/evaluator');
const { BuiltinDefault } = require('@nlpjs/builtin-default');

const corpus = {
  name: "Corpus with entities",
  locale: "en-US",
  contextData: {
    spiderman: {
      realName: "Peter Parker",
      city: "Queens, New York"
    },
    ironman: {
      realName: "Tony Stark",
      city: "Stark Tower, New York"
    },
    thor: {
      realName: "Odinson",
      city: "Asgard"
    }
  },
  data: [
    {
      intent: "hero.realname",
      utterances: [
        "what is the real name of @hero"
      ],
      answers: [
        "The real name of {{ hero }} is {{ _data[entities.hero.option].realName }}"
      ]
    },
    {
      intent: "hero.city",
      utterances: [
        "where @hero lives?",
        "what's the city of @hero?"
      ],
      answers: [
        "{{ hero }} lives at {{ _data[entities.hero.option].city }}"
      ]
    }
  ],
  entities: {
    hero: {
      options: {
        spiderman: ["spiderman", "spider-man"],
        ironman: ["ironman", "iron-man"],
        thor: ["thor"]
      }
    }
  }
};

(async () => {
  const container = await containerBootstrap();
  container.use(Nlp);
  container.use(LangEn);
  container.register('Evaluator', Evaluator);
  container.register('Template', Template);
  const builtin = new BuiltinDefault();
  container.register('extract-builtin-??', builtin, true);
  const nlp = container.get('nlp');
  nlp.settings.autoSave = false;
  nlp.addCorpus(corpus);
  await nlp.train();
  const response = await nlp.process('en', 'what is the real name of spiderman 25?');
  console.log(response);
})();

The response:

{
  locale: 'en',
  utterance: 'what is the real name of spiderman 25?',
  settings: undefined,
  languageGuessed: false,
  localeIso2: 'en',
  language: 'English',
  nluAnswer: {
    classifications: [ [Object] ],
    entities: undefined,
    explanation: undefined
  },
  classifications: [ { intent: 'hero.realname', score: 1 } ],
  intent: 'hero.realname',
  score: 1,
  domain: 'default',
  sourceEntities: [],
  entities: [
    {
      start: 25,
      end: 33,
      len: 9,
      levenshtein: 0,
      accuracy: 1,
      entity: 'hero',
      type: 'enum',
      option: 'spiderman',
      sourceText: 'spiderman',
      utteranceText: 'spiderman'
    },
    {
      start: 35,
      end: 36,
      len: 2,
      accuracy: 0.95,
      sourceText: '25',
      utteranceText: '25',
      entity: 'number',
      resolution: [Object]
    }
  ],
  answers: [
    {
      answer: 'The real name of spiderman is Peter Parker',
      opts: undefined
    }
  ],
  answer: 'The real name of spiderman is Peter Parker',
  actions: [],
  sentiment: {
    score: 0,
    numWords: 0,
    numHits: 0,
    average: 0,
    type: undefined,
    locale: 'en',
    vote: 'neutral'
  }
}
jesus-seijas-sp commented 3 years ago

Update: Using this code reduces the bundle size from 317KB down to 128KB, because it avoid including esprima and escodegen in the bundle.

const { containerBootstrap } = require('@nlpjs/core');
const { Nlp } = require('@nlpjs/nlp');
const { LangEn } = require('@nlpjs/lang-en-min');
const { BuiltinDefault } = require('@nlpjs/builtin-default');

function evalInScope(js, contextAsScope) {
  with(contextAsScope) {
    return eval(js);
  }
}

class Evaluator {
  evaluate(str, context) {
    return evalInScope(str, context);
  }
}

const evaluator = new Evaluator();

const dictionary = {};

function processString(str, context) {
  if (dictionary[str] === undefined) {
    dictionary[str] = str.match(/{{\s*([^}]+)\s*}}/g) || [];
  }
  const matches = dictionary[str];
  return matches.reduce((p, c) => {
    const solution = evaluator.evaluate(c.substr(2, c.length - 4), context);
    return solution !== null && solution !== undefined
      ? p.replace(c, solution)
      : p;
  }, str);
}

function process(obj, context) {
  if (typeof obj === 'string') {
    return processString(obj, context);
  }
  if (Array.isArray(obj)) {
    return obj.map((x) => process(x, context));
  }
  if (obj !== null && typeof obj === 'object') {
    const keys = Object.keys(obj);
    const result = {};
    for (let i = 0; i < keys.length; i += 1) {
      result[keys[i]] = process(obj[keys[i]], context);
    }
    return result;
  }
  return obj;
}

function compile(str) {
  return (context = {}) => process(str, context);
}

class Template {
  compile(str, context) {
    return compile(str)(context);
  }
}

const corpus = {
  name: "Corpus with entities",
  locale: "en-US",
  contextData: {
    spiderman: {
      realName: "Peter Parker",
      city: "Queens, New York"
    },
    ironman: {
      realName: "Tony Stark",
      city: "Stark Tower, New York"
    },
    thor: {
      realName: "Odinson",
      city: "Asgard"
    }
  },
  data: [
    {
      intent: "hero.realname",
      utterances: [
        "what is the real name of @hero"
      ],
      answers: [
        "The real name of {{ hero }} is {{ _data[entities.hero.option].realName }}"
      ]
    },
    {
      intent: "hero.city",
      utterances: [
        "where @hero lives?",
        "what's the city of @hero?"
      ],
      answers: [
        "{{ hero }} lives at {{ _data[entities.hero.option].city }}"
      ]
    }
  ],
  entities: {
    hero: {
      options: {
        spiderman: ["spiderman", "spider-man"],
        ironman: ["ironman", "iron-man"],
        thor: ["thor"]
      }
    }
  }
};

(async () => {
  const container = await containerBootstrap();
  container.use(Nlp);
  container.use(LangEn);
  container.register('Evaluator', new Evaluator(), true);
  container.register('Template', new Template(), true);
  const builtin = new BuiltinDefault();
  container.register('extract-builtin-??', builtin, true);
  const nlp = container.get('nlp');
  nlp.settings.autoSave = false;
  nlp.addCorpus(corpus);
  await nlp.train();
  const response = await nlp.process('en', 'what is the real name of spiderman 25?');
  console.log(response);
})();
obaid commented 3 years ago

Thanks @jesus-seijas-sp the evaluator was the missing piece. :)