obulat / zeyrek

Python morphological analyzer for Turkish language. Partial port of ZemberekNLP.
https://zeyrek.readthedocs.io/en/latest/
MIT License
47 stars 8 forks source link

Terkipleri tanımıyor! #34

Open alitekdemir opened 1 year ago

alitekdemir commented 1 year ago

Merhaba Eski kelimelerde kullanılan -i -u -ül vb eklemeleri Unknown olarak işaretliyor. Bu ekleri göz ardı ettirmek mümkün mü?

r = "tevahhuş Rezzâk Rezzâk-ı Zülcelâle bakiye-i ömrümü ahz-ı mal Mün'im-i Hakikîye şükrü, senâyı zâhirî esbaba"
data = analyzer.analyze(r)
result = [i for x in data for i in x ]
print(result)
[Parse(word='tevahhuş', lemma='tevahhuş', pos='Noun', morphemes=['Noun', 'A3sg'], formatted='[tevahhuş:Noun] tevahhuş:Noun+A3sg'),
 Parse(word='Rezzâk', lemma='Rezzak', pos='Noun', morphemes=['Noun', 'A3sg'], formatted='[Rezzak:Noun,Prop] rezzak:Noun+A3sg'),
 Parse(word='Rezzâk-ı', lemma='Unk', pos='Unk', morphemes='Unk', formatted='Unk'),
 Parse(word='Zülcelâle', lemma='Unk', pos='Unk', morphemes='Unk', formatted='Unk'),
 Parse(word='bakiye-i', lemma='Unk', pos='Unk', morphemes='Unk', formatted='Unk'),
 Parse(word='ömrümü', lemma='ömür', pos='Noun', morphemes=['Noun', 'A3sg', 'P1sg', 'Acc'], formatted='[ömür:Noun] ömr:Noun+A3sg+üm:P1sg+ü:Acc'),
 Parse(word='ahz-ı', lemma='Unk', pos='Unk', morphemes='Unk', formatted='Unk'),
 Parse(word='mal', lemma='mal', pos='Noun', morphemes=['Noun', 'A3sg'], formatted='[mal:Noun] mal:Noun+A3sg'),
 Parse(word='Münim-i', lemma='Unk', pos='Unk', morphemes='Unk', formatted='Unk'),
 Parse(word='Hakikîye', lemma='hakikî', pos='Noun', morphemes=['Noun', 'A3sg', 'Dat'], formatted='[hakikî:Noun] hakiki:Noun+A3sg+ye:Dat'),
 Parse(word='şükrü', lemma='şükür', pos='Noun', morphemes=['Noun', 'A3sg', 'Acc'], formatted='[şükür:Noun] şükr:Noun+A3sg+ü:Acc'),
 Parse(word='şükrü', lemma='şükür', pos='Noun', morphemes=['Noun', 'A3sg', 'P3sg'], formatted='[şükür:Noun] şükr:Noun+A3sg+ü:P3sg'),
 Parse(word='şükrü', lemma='Şükrü', pos='Noun', morphemes=['Noun', 'A3sg'], formatted='[Şükrü:Noun,Prop] şükrü:Noun+A3sg'),
 Parse(word=',', lemma=',', pos='Punc', morphemes=['Punc'], formatted='[,:Punc] ,:Punc'),
 Parse(word='senâyı', lemma='Sena', pos='Noun', morphemes=['Noun', 'A3sg', 'Acc'], formatted='[Sena:Noun,Prop] sena:Noun+A3sg+yı:Acc'),
 Parse(word='senâyı', lemma='sena', pos='Noun', morphemes=['Noun', 'A3sg', 'Acc'], formatted='[sena:Noun] sena:Noun+A3sg+yı:Acc'),
 Parse(word='senâyı', lemma='Senay', pos='Noun', morphemes=['Noun', 'A3sg', 'Acc'], formatted='[Senay:Noun,Prop] senay:Noun+A3sg+ı:Acc'),
 Parse(word='senâyı', lemma='Senay', pos='Noun', morphemes=['Noun', 'A3sg', 'P3sg'], formatted='[Senay:Noun,Prop] senay:Noun+A3sg+ı:P3sg'),
 Parse(word='zâhirî', lemma='zahirî', pos='Adj', morphemes=['Adj'], formatted='[zahirî:Adj] zahiri:Adj'),
 Parse(word='zâhirî', lemma='Zahir', pos='Noun', morphemes=['Noun', 'A3sg', 'Acc'], formatted='[Zahir:Noun,Prop] zahir:Noun+A3sg+i:Acc'),
 Parse(word='zâhirî', lemma='Zahir', pos='Noun', morphemes=['Noun', 'A3sg', 'P3sg'], formatted='[Zahir:Noun,Prop] zahir:Noun+A3sg+i:P3sg'),
 Parse(word='zâhirî', lemma='zahir', pos='Noun', morphemes=['Noun', 'A3sg', 'Acc'], formatted='[zahir:Noun] zahir:Noun+A3sg+i:Acc'),
 Parse(word='zâhirî', lemma='zahir', pos='Noun', morphemes=['Noun', 'A3sg', 'P3sg'], formatted='[zahir:Noun] zahir:Noun+A3sg+i:P3sg'),
 Parse(word='zâhirî', lemma='zahir', pos='Noun', morphemes=['Adj', 'Zero', 'Noun', 'A3sg', 'Acc'], formatted='[zahir:Adj] zahir:Adj|Zero→Noun+A3sg+i:Acc'),
 Parse(word='zâhirî', lemma='zahir', pos='Noun', morphemes=['Adj', 'Zero', 'Noun', 'A3sg', 'P3sg'], formatted='[zahir:Adj] zahir:Adj|Zero→Noun+A3sg+i:P3sg'),
 Parse(word='esbaba', lemma='esbap', pos='Noun', morphemes=['Noun', 'A3sg', 'Dat'], formatted='[esbap:Noun] esbab:Noun+A3sg+a:Dat')]