psolin / cleanco

Company Name Processor written in Python
MIT License
318 stars 94 forks source link

Want to add custom words #84

Open aditya113141 opened 2 years ago

aditya113141 commented 2 years ago

I need to add words - 'Software' and 'Foundation' to the list of words to be cleaned. How to modify the library ?

FBnil commented 1 year ago

There's a bug in the code that does not allow you to prepare your terms (because the function does not allow parameters), so here's a way to do it, without the need to modify the library:

from cleanco import basename
from cleanco.clean import custom_basename,normalize_terms,terms_by_type,terms_by_country
import functools
import operator

my_terms_by_type = {
   'Foundation': ['fnd']
}

def prepare_my_terms(terms_by_type, terms_by_country):
    ts = functools.reduce(operator.iconcat, terms_by_type.values(), [])
    cs = functools.reduce(operator.iconcat, terms_by_country.values(), [])
    terms = set(ts + cs)
    nterms = normalize_terms(terms)
    ntermparts = (t.split() for t in nterms)
    sntermparts = sorted(ntermparts, key=lambda x: (-len(x), x))
    return [(len(tp), tp) for tp in sntermparts]

business_name1 = "Some Big Pharma, LLC"
business_name2 = "Spaguetti, fnd"

print(basename(business_name1))
print(basename(business_name2))

my_terms=prepare_my_terms( { **terms_by_type, **my_terms_by_type }, terms_by_country)

print(custom_basename(business_name1, my_terms))
print(custom_basename(business_name2, my_terms))