Closed liuliAI closed 7 months ago
上传的文件是什么类型的啊
上传的文件是什么类型的啊
txt,已放入documents目录里
def load_documents(directory="documents"): loader = DirectoryLoader(directory) documents = loader.load() text_spliter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100) split_docs = text_spliter.split_documents(documents) return split_docs
上传的文件是什么类型的啊
txt,已放入documents目录里
def load_documents(directory="documents"): loader = DirectoryLoader(directory) documents = loader.load() text_spliter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100) split_docs = text_spliter.split_documents(documents) return split_docs
试试TextLoader,这里有参考:https://github.com/chatchat-space/Langchain-Chatchat/issues/557
上传的文件是什么类型的啊
txt,已放入documents目录里 def load_documents(directory="documents"): loader = DirectoryLoader(directory) documents = loader.load() text_spliter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100) split_docs = text_spliter.split_documents(documents) return split_docs
试试TextLoader,这里有参考:chatchat-space/Langchain-Chatchat#557
非常感谢,已解决
上传的文件是什么类型的啊
txt,已放入documents目录里 def load_documents(directory="documents"): loader = DirectoryLoader(directory) documents = loader.load() text_spliter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100) split_docs = text_spliter.split_documents(documents) return split_docs
试试TextLoader,这里有参考:chatchat-space/Langchain-Chatchat#557
非常感谢,已解决
OKOK
Traceback (most recent call last): File "/data/glm/LLMKB.py", line 80, in
documents = load_documents()
File "/data/glm/LLMKB.py", line 30, in load_documents
documents = loader.load()
File "/dataanaconda3/envs/glm/lib/python3.10/site-packages/langchain/document_loaders/directory.py", line 156, in load
self.load_file(i, p, docs, pbar)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/langchain/document_loaders/directory.py", line 105, in load_file
raise e
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/langchain/document_loaders/directory.py", line 99, in load_file
sub_docs = self.loader_cls(str(item), self.loader_kwargs).load()
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/langchain/document_loaders/unstructured.py", line 86, in load
elements = self._get_elements()
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/langchain/document_loaders/unstructured.py", line 172, in _get_elements
return partition(filename=self.file_path, self.unstructured_kwargs)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/partition/auto.py", line 434, in partition
elements = partition_text(
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/partition/text.py", line 95, in partition_text
return _partition_text(
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/documents/elements.py", line 526, in wrapper
elements = func(*args, kwargs)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/file_utils/filetype.py", line 627, in wrapper
elements = func(*args, *kwargs)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/file_utils/filetype.py", line 582, in wrapper
elements = func(args, kwargs)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/chunking/init.py", line 71, in wrapper
elements = func(*args, kwargs)
File "/data/liujiqiang/anaconda3/envs/glm4/lib/python3.10/site-packages/unstructured/partition/text.py", line 192, in _partition_text
element = element_from_text(ctext)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/partition/text.py", line 285, in element_from_text
elif is_possible_narrative_text(text):
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/partition/text_type.py", line 88, in is_possible_narrative_text
if "eng" in languages and (sentence_count(text, 3) < 2) and (not contains_verb(text)):
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/partition/text_type.py", line 190, in contains_verb
pos_tags = pos_tag(text)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/nlp/tokenize.py", line 44, in pos_tag
_download_nltk_package_if_not_present(
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/unstructured/nlp/tokenize.py", line 21, in _download_nltk_package_if_not_present
nltk.find(f"{package_category}/{package_name}")
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/nltk/data.py", line 555, in find
return find(modified_name, paths)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/nltk/data.py", line 542, in find
return ZipFilePathPointer(p, zipentry)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/nltk/compat.py", line 41, in _decorator
return init_func(*args, *kwargs)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/nltk/data.py", line 394, in init
zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/nltk/compat.py", line 41, in _decorator
return init_func(args, kwargs)
File "/data/anaconda3/envs/glm/lib/python3.10/site-packages/nltk/data.py", line 935, in init
zipfile.ZipFile.init(self, filename)
File "/data/anaconda3/envs/glm/lib/python3.10/zipfile.py", line 1269, in init
self._RealGetContents()
File "/data/anaconda3/envs/glm/lib/python3.10/zipfile.py", line 1336, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file