Open jpamintuan opened 3 days ago
Sorry Im very new to this But I think its a problem in my system
I got the same issue as well. I fixed it by renaming the directory inside punkt from PY3 to PY3_tab as it was looking for that directory, i think in your case you need to rename it to punkt_tab
Resource punkt_tab not found. This error might be fixed
As the error states one is missing punkt_tab
thus you need to download the required package via
python -m nltk.downloader tokenizers
This info is also found in nltk's documentation.
Hi, I ran the create_database.py but I kept getting this error: `
(ONE_RAG_TEST) ubuntu@ip-~/test_ollama/langchain-rag-tutorial$ python create_database.py [nltk_data] Downloading package punkt to /home/ubuntu/nltk_data... [nltk_data] Package punkt is already up-to-date! Error loading file data/books/alice_in_wonderland.md Traceback (most recent call last): File "/home/ubuntu/test_ollama/langchain-rag-tutorial/create_database.py", line 73, in
main()
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/create_database.py", line 27, in main
generate_data_store()
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/create_database.py", line 31, in generate_data_store
documents = load_documents()
^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/create_database.py", line 38, in load_documents
documents = loader.load()
^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/langchain_community/document_loaders/directory.py", line 117, in load
return list(self.lazy_load())
^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/langchain_community/document_loaders/directory.py", line 182, in lazy_load
yield from self._lazy_load_file(i, p, pbar)
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/langchain_community/document_loaders/directory.py", line 220, in _lazy_load_file
raise e
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/langchain_community/document_loaders/directory.py", line 210, in _lazy_load_file
for subdoc in loader.lazy_load():
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/langchain_community/document_loaders/unstructured.py", line 88, in lazy_load
elements = self._get_elements()
^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/langchain_community/document_loaders/unstructured.py", line 180, in _get_elements
return partition(filename=self.file_path, self.unstructured_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/partition/auto.py", line 415, in partition
elements = _partition_md(
^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/documents/elements.py", line 591, in wrapper
elements = func(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/file_utils/filetype.py", line 618, in wrapper
elements = func(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/file_utils/filetype.py", line 582, in wrapper
elements = func(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/chunking/dispatch.py", line 74, in wrapper
elements = func(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/partition/md.py", line 112, in partition_md
return partition_html(
^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/documents/elements.py", line 591, in wrapper
elements = func(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/file_utils/filetype.py", line 618, in wrapper
elements = func(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/file_utils/filetype.py", line 582, in wrapper
elements = func(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/chunking/dispatch.py", line 74, in wrapper
elements = func(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/partition/html.py", line 149, in partition_html
document_to_element_list(
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/partition/common.py", line 559, in document_to_element_list
num_pages = len(document.pages)
^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/documents/xml.py", line 54, in pages
self._pages = self._parse_pages_from_element_tree()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/documents/html.py", line 173, in _parse_pages_from_element_tree
_page_elements, descendanttag_elems = _process_text_tag(tag_elem)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/documents/html.py", line 630, in _process_text_tag
element = _parse_tag(tag_elem, include_tail_text)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/documents/html.py", line 438, in _parse_tag
return _text_to_element(
^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/documents/html.py", line 486, in _text_to_element
elif is_narrative_tag(text, tag):
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/documents/html.py", line 536, in is_narrative_tag
return tag not in HEADING_TAGS and is_possible_narrative_text(text)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/partition/text_type.py", line 80, in is_possible_narrative_text
if exceeds_cap_ratio(text, threshold=cap_threshold):
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/partition/text_type.py", line 276, in exceeds_cap_ratio
if sentence_count(text, 3) > 1:
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/partition/text_type.py", line 225, in sentence_count
sentences = sent_tokenize(text)
^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/unstructured/nlp/tokenize.py", line 30, in sent_tokenize
return _sent_tokenize(text)
^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/nltk/tokenize/init.py", line 119, in sent_tokenize
tokenizer = _get_punkt_tokenizer(language)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/nltk/tokenize/init.py", line 105, in _get_punkt_tokenizer
return PunktTokenizer(language)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/nltk/tokenize/punkt.py", line 1744, in init
self.load_lang(lang)
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang
lang_dir = find(f"tokenizers/punkt_tab/{lang}/")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/test_ollama/langchain-rag-tutorial/ONE_RAG_TEST/lib/python3.12/site-packages/nltk/data.py", line 579, in find
raise LookupError(resource_not_found)
LookupError:
Resource punkt_tab not found. Please use the NLTK Downloader to obtain the resource:
For more information see: https://www.nltk.org/data.html
Attempted to load tokenizers/punkt_tab/english/
Searched in:
(ONE_RAG_TEST) ubuntu:~/test_ollama/langchain-rag-tutorial$
`