from rftokenizer import RFTokenizer
my_tokenizer = RFTokenizer(model="heb") # I also tried heb.sm3
tokenized = my_tokenizer.rf_tokenize('שלום וברכה')
print(tokenized)
but it failed with:
C:\Users\Zvika\AppData\Local\pypoetry\Cache\virtualenvs\parser-aJ2KWzVO-py3.12\Scripts\python.exe C:\Zvika\PycharmProjects\milon\parser\temp.py
Traceback (most recent call last):
File "C:\Zvika\PycharmProjects\milon\parser\temp.py", line 4, in <module>
tokenized = my_tokenizer.rf_tokenize('שלום וברכה')
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Zvika\AppData\Local\pypoetry\Cache\virtualenvs\parser-aJ2KWzVO-py3.12\Lib\site-packages\rftokenizer\tokenize_rf.py", line 923, in rf_tokenize
self.load()
File "C:\Users\Zvika\AppData\Local\pypoetry\Cache\virtualenvs\parser-aJ2KWzVO-py3.12\Lib\site-packages\rftokenizer\tokenize_rf.py", line 540, in load
self.bert = FlairTagger(seg=True)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Zvika\AppData\Local\pypoetry\Cache\virtualenvs\parser-aJ2KWzVO-py3.12\Lib\site-packages\rftokenizer\flair_pos_tagger.py", line 49, in __init__
self.model = SequenceTagger.load(model_dir + lang_prefix + ".seg")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Zvika\AppData\Local\pypoetry\Cache\virtualenvs\parser-aJ2KWzVO-py3.12\Lib\site-packages\flair\models\sequence_tagger_model.py", line 1036, in load
return cast("SequenceTagger", super().load(model_path=model_path))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Zvika\AppData\Local\pypoetry\Cache\virtualenvs\parser-aJ2KWzVO-py3.12\Lib\site-packages\flair\nn\model.py", line 555, in load
return cast("Classifier", super().load(model_path=model_path))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Zvika\AppData\Local\pypoetry\Cache\virtualenvs\parser-aJ2KWzVO-py3.12\Lib\site-packages\flair\nn\model.py", line 179, in load
state = load_torch_state(model_file)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Zvika\AppData\Local\pypoetry\Cache\virtualenvs\parser-aJ2KWzVO-py3.12\Lib\site-packages\flair\file_utils.py", line 352, in load_torch_state
return torch.load(f, map_location="cpu")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Zvika\AppData\Local\pypoetry\Cache\virtualenvs\parser-aJ2KWzVO-py3.12\Lib\site-packages\torch\serialization.py", line 1004, in load
with _open_zipfile_reader(opened_file) as opened_zipfile:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Zvika\AppData\Local\pypoetry\Cache\virtualenvs\parser-aJ2KWzVO-py3.12\Lib\site-packages\torch\serialization.py", line 456, in __init__
super().__init__(torch._C.PyTorchFileReader(name_or_buffer))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: PytorchStreamReader failed reading zip archive: failed finding central directory
Process finished with exit code 1
If it's relevant, I'm using Python 3.12.3, and this is the output of pip list:
Hi. I tried this simple code:
but it failed with:
If it's relevant, I'm using Python 3.12.3, and this is the output of
pip list
: