spencermountain / compromise

modest natural-language processing
http://compromise.cool
MIT License
11.46k stars 656 forks source link

[Suggestion]: Improvements on currency parser & rules. #1037

Open MarketingPip opened 1 year ago

MarketingPip commented 1 year ago

The currency parser seems to need some work. Things like ("Five cents") or ("5 dollars") just return "5"...

Plus we should be adding rules for all things like "(one|five|ten) (dollar|dollars) bill".

That said - throwing this here! (Thinking this should be the new .get() ) etc...

function extractCurrencys(str) {
   let doc = nlp(str).money().json();
  let currencies = []

 for(let item in doc){
   let currency = doc[item]
   const unit = currency?.number?.unit;  // we need to replace units with only (dollars/cents, exchanges like USD etc...)
    const text = currency?.text;
   if(unit && text){
     currencies.push({text,unit})
   }

 }
  return currencies
}

console.log(extractCurrencys("$1 dollar to $2 dollar")) 

If you want to hack on this alone - you're more than welcome. Or you want to work on it together. More than fine.

MarketingPip commented 1 year ago

Leaving this here for self - WIP:

function extractCurrencys(str) {
   let doc = nlp(str).money().json();
  let currencies = []

 for(let item in doc){
   let currency = doc[item]
   let unit = currency?.number?.unit;  // we need to replace units with only (dollars/cents, exchanges like USD etc...)
    const text = currency?.text;

   if(unit && !nlp(currency.number.unit).has("#Currency")){
     unit = null;
   }

   if(unit && text){
     currencies.push({text,unit})
   }

    if(!unit && text){
     currencies.push({text,unit:null})
   }

 }
  return currencies
}