报错:
Traceback (most recent call last):
File "/home/david/20240207/ChatGLM2-6B/demo/doc_pro.py", line 5, in
docs = loader.load()
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/langchain_core/document_loaders/base.py", line 29, in load
return list(self.lazy_load())
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/langchain_community/document_loaders/unstructured.py", line 87, in lazy_load
elements = self._get_elements()
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/langchain_community/document_loaders/unstructured.py", line 175, in _get_elements
return partition(filename=self.file_path, self.unstructured_kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/auto.py", line 434, in partition
elements = partition_text(
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/text.py", line 95, in partition_text
return _partition_text(
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/documents/elements.py", line 526, in wrapper
elements = func(*args, *kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/file_utils/filetype.py", line 622, in wrapper
elements = func(args, kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/file_utils/filetype.py", line 582, in wrapper
elements = func(*args, kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/chunking/dispatch.py", line 83, in wrapper
elements = func(*args, *kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/text.py", line 192, in _partition_text
element = element_from_text(ctext)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/text.py", line 285, in element_from_text
elif is_possible_narrative_text(text):
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/text_type.py", line 88, in is_possible_narrative_text
if "eng" in languages and (sentence_count(text, 3) < 2) and (not contains_verb(text)):
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/text_type.py", line 190, in contains_verb
pos_tags = pos_tag(text)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/nlp/tokenize.py", line 44, in pos_tag
_download_nltk_package_if_not_present(
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/nlp/tokenize.py", line 21, in _download_nltk_package_if_not_present
nltk.find(f"{package_category}/{package_name}")
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/data.py", line 555, in find
return find(modified_name, paths)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/data.py", line 542, in find
return ZipFilePathPointer(p, zipentry)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/compat.py", line 41, in _decorator
return init_func(args, kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/data.py", line 394, in init
zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/compat.py", line 41, in _decorator
return init_func(*args, **kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/data.py", line 935, in init
zipfile.ZipFile.init(self, filename)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/zipfile.py", line 1271, in init
self._RealGetContents()
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/zipfile.py", line 1338, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
Expected Behavior
No response
Steps To Reproduce
from langchain.document_loaders import UnstructuredFileLoader
Is there an existing issue for this?
Current Behavior
通过 UnstructuredFileLoade 读取 txt 文件 from langchain.document_loaders import UnstructuredFileLoader
loader = UnstructuredFileLoader("/home/ChatGLM2-6B/demo/date/novel.txt") docs = loader.load() print(f'documents:{len(data)}')
报错: Traceback (most recent call last): File "/home/david/20240207/ChatGLM2-6B/demo/doc_pro.py", line 5, in
docs = loader.load()
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/langchain_core/document_loaders/base.py", line 29, in load
return list(self.lazy_load())
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/langchain_community/document_loaders/unstructured.py", line 87, in lazy_load
elements = self._get_elements()
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/langchain_community/document_loaders/unstructured.py", line 175, in _get_elements
return partition(filename=self.file_path, self.unstructured_kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/auto.py", line 434, in partition
elements = partition_text(
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/text.py", line 95, in partition_text
return _partition_text(
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/documents/elements.py", line 526, in wrapper
elements = func(*args, *kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/file_utils/filetype.py", line 622, in wrapper
elements = func(args, kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/file_utils/filetype.py", line 582, in wrapper
elements = func(*args, kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/chunking/dispatch.py", line 83, in wrapper
elements = func(*args, *kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/text.py", line 192, in _partition_text
element = element_from_text(ctext)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/text.py", line 285, in element_from_text
elif is_possible_narrative_text(text):
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/text_type.py", line 88, in is_possible_narrative_text
if "eng" in languages and (sentence_count(text, 3) < 2) and (not contains_verb(text)):
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/partition/text_type.py", line 190, in contains_verb
pos_tags = pos_tag(text)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/nlp/tokenize.py", line 44, in pos_tag
_download_nltk_package_if_not_present(
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/unstructured/nlp/tokenize.py", line 21, in _download_nltk_package_if_not_present
nltk.find(f"{package_category}/{package_name}")
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/data.py", line 555, in find
return find(modified_name, paths)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/data.py", line 542, in find
return ZipFilePathPointer(p, zipentry)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/compat.py", line 41, in _decorator
return init_func(args, kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/data.py", line 394, in init
zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/compat.py", line 41, in _decorator
return init_func(*args, **kwargs)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/site-packages/nltk/data.py", line 935, in init
zipfile.ZipFile.init(self, filename)
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/zipfile.py", line 1271, in init
self._RealGetContents()
File "/home/david/anaconda3/envs/chatglm/lib/python3.10/zipfile.py", line 1338, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
Expected Behavior
No response
Steps To Reproduce
from langchain.document_loaders import UnstructuredFileLoader
loader = UnstructuredFileLoader("/home/ChatGLM2-6B/demo/date/novel.txt") docs = loader.load()
Environment
Anything else?
No response