Open sebastian0619 opened 1 year ago
我在linux遇到了与你一样的问题,解决方法也与你一致。我的docker也是直接拉的最新的镜像。 这是我的报错 ` 2024-05-03 11:29:15,628 [INFO] [index_func.py:29] loading file: 基于YOLOv5Gradio的在线多功能视频图像检测系统.docx /root/.local/lib/python3.10/site-packages/langchain/document_loaders/init.py:36: LangChainDeprecationWarning: Importing document loaders from langchain is deprecated. Importing from langchain will no longer be supported as of langchain==0.2.0. Please import from langchain-community instead:
from langchain_community.document_loaders import UnstructuredWordDocumentLoader
.
To install langchain-community run pip install -U langchain-community
.
warnings.warn(
2024-05-03 11:29:21,299 [ERROR] [index_func.py:81] Error loading file: 基于YOLOv5Gradio的在线多功能视频图像检测系统.docx
Traceback (most recent call last):
File "/root/.local/lib/python3.10/site-packages/unstructured/nlp/tokenize.py", line 21, in _download_nltk_package_if_not_present
nltk.find(f"{package_category}/{package_name}")
File "/root/.local/lib/python3.10/site-packages/nltk/data.py", line 583, in find
raise LookupError(resource_not_found)
LookupError:
Resource [93mpunkt[0m not found. Please use the NLTK Downloader to obtain the resource:
[31m>>> import nltk
nltk.download('punkt') [0m For more information see: https://www.nltk.org/data.html
Attempted to load [93mtokenizers/punkt[0m
Searched in:
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/app/modules/index_func.py", line 53, in get_documents texts = loader.load() File "/root/.local/lib/python3.10/site-packages/langchain_core/document_loaders/base.py", line 29, in load return list(self.lazy_load()) File "/root/.local/lib/python3.10/site-packages/langchain_community/document_loaders/unstructured.py", line 88, in lazy_load elements = self._get_elements() File "/root/.local/lib/python3.10/site-packages/langchain_community/document_loaders/word_document.py", line 125, in _get_elements return partition_docx(filename=self.file_path, self.unstructured_kwargs) File "/root/.local/lib/python3.10/site-packages/unstructured/documents/elements.py", line 539, in wrapper elements = func(*args, *kwargs) File "/root/.local/lib/python3.10/site-packages/unstructured/file_utils/filetype.py", line 622, in wrapper elements = func(args, kwargs) File "/root/.local/lib/python3.10/site-packages/unstructured/file_utils/filetype.py", line 582, in wrapper elements = func(*args, *kwargs) File "/root/.local/lib/python3.10/site-packages/unstructured/chunking/dispatch.py", line 83, in wrapper elements = func(args, **kwargs) File "/root/.local/lib/python3.10/site-packages/unstructured/partition/docx.py", line 233, in partition_docx return list(elements) File "/root/.local/lib/python3.10/site-packages/unstructured/partition/lang.py", line 397, in apply_lang_metadata elements = list(elements) File "/root/.local/lib/python3.10/site-packages/unstructured/partition/docx.py", line 314, in _iter_document_elements yield from self._iter_paragraph_elements(block_item) File "/root/.local/lib/python3.10/site-packages/unstructured/partition/docx.py", line 555, in _iter_paragraph_elements yield from self._classify_paragraph_to_element(item) File "/root/.local/lib/python3.10/site-packages/unstructured/partition/docx.py", line 375, in _classify_paragraph_to_element TextSubCls = self._parse_paragraph_text_for_element_type(paragraph) File "/root/.local/lib/python3.10/site-packages/unstructured/partition/docx.py", line 884, in _parse_paragraph_text_for_element_type if is_possible_narrative_text(text): File "/root/.local/lib/python3.10/site-packages/unstructured/partition/text_type.py", line 78, in is_possible_narrative_text if exceeds_cap_ratio(text, threshold=cap_threshold): File "/root/.local/lib/python3.10/site-packages/unstructured/partition/text_type.py", line 274, in exceeds_cap_ratio if sentence_count(text, 3) > 1: File "/root/.local/lib/python3.10/site-packages/unstructured/partition/text_type.py", line 223, in sentence_count sentences = sent_tokenize(text) File "/root/.local/lib/python3.10/site-packages/unstructured/nlp/tokenize.py", line 29, in sent_tokenize _download_nltk_package_if_not_present(package_category="tokenizers", package_name="punkt") File "/root/.local/lib/python3.10/site-packages/unstructured/nlp/tokenize.py", line 23, in _download_nltk_package_if_not_present nltk.download(package_name) File "/root/.local/lib/python3.10/site-packages/nltk/downloader.py", line 777, in download for msg in self.incr_download(info_or_id, download_dir, force): File "/root/.local/lib/python3.10/site-packages/nltk/downloader.py", line 629, in incr_download info = self._info_or_id(info_or_id) File "/root/.local/lib/python3.10/site-packages/nltk/downloader.py", line 603, in _info_or_id return self.info(info_or_id) File "/root/.local/lib/python3.10/site-packages/nltk/downloader.py", line 1009, in info self._update_index() File "/root/.local/lib/python3.10/site-packages/nltk/downloader.py", line 952, in _update_index ElementTree.parse(urlopen(self._url)).getroot() File "/usr/local/lib/python3.10/xml/etree/ElementTree.py", line 1222, in parse tree.parse(source, parser) File "/usr/local/lib/python3.10/xml/etree/ElementTree.py", line 580, in parse self._root = parser._parse_whole(source) xml.etree.ElementTree.ParseError: unclosed token: line 39, column 4 Traceback (most recent call last): File "/app/modules/models/base_model.py", line 411, in handle_file_upload construct_index(self.api_key, file_src=files) File "/app/modules/index_func.py", line 134, in construct_index raise Exception(i18n("没有找到任何支持的文档。")) Exception: No supported documents found. ` 时间:2024/5/3 我很好奇是什么原因导致的?
是否已存在现有反馈与解答?
是否是一个代理配置相关的疑问?
错误描述
复现操作
错误日志
运行环境
补充说明
No response