Closed mustafa0x closed 9 months ago
given the phrase 'كلم الرجل'
>>> words = 'كلم الرجل' >>> from camel_tools.disambig.mle import MLEDisambiguator ... from camel_tools.tokenizers.morphological import MorphologicalTokenizer ... from camel_tools.disambig.bert import BERTUnfactoredDisambiguator ... from camel_tools.morphology.database import MorphologyDB ... from camel_tools.morphology.analyzer import Analyzer ... ... unfactored = BERTUnfactoredDisambiguator.pretrained() ... analyzer = Analyzer(db=MorphologyDB.builtin_db()) ... mle_msa = MLEDisambiguator.pretrained('calima-msa-r13') ... msa_d3_tokenizer = MorphologicalTokenizer(disambiguator=mle_msa, scheme='d3tok') # the root of كلم is seen as FOREIGN (since it assumes it is kilometer) >>> analyzer.analyze(words)[0]['root'] 'FOREIGN' >>> unfactored.disambiguate([words])[0].analyses[0].analysis['root'] 'FOREIGN' >>> mle_msa.disambiguate([words])[0].analyses[0].analysis['root'] 'FOREIGN' >>> import requests ... import json ... ... url = "https://camelira.abudhabi.nyu.edu/api/disambig" ... headers = { ... "content-type": "application/json", ... "Referer": "https://camelira.abudhabi.nyu.edu/", ... } ... data = { ... "dialect": "msa", ... "sentence": words, ... } ... response = requests.post(url, headers=headers, data=json.dumps(data)) # camelira returns the correct root >>> json.loads(response.text)['output']['disambig'][0]['analyses'][0]['analysis']['root'] 'ك.ل.م'
given the phrase 'كلم الرجل'