severian42 / GraphRAG-Local-UI

GraphRAG using Local LLMs - Features robust API and multiple apps for Indexing/Prompt Tuning/Query/Chat/Visualizing/Etc. This is meant to be the ultimate GraphRAG/KG local LLM app.
MIT License
1.51k stars 173 forks source link

Indexing Error: UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa8 in position 431: invalid start byte #63

Closed Ikaros-521 closed 1 month ago

Ikaros-521 commented 1 month ago
🚀 Reading settings from ragtest\settings.yaml
Traceback (most recent call last):
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\__main__.py", line 68, in <module>
index_cli(
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\cli.py", line 156, in index_cli
_run_workflow_async()
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\cli.py", line 144, in _run_workflow_async
loop.run_until_complete(execute())
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\nest_asyncio.py", line 98, in run_until_complete
return f.result()
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\asyncio\futures.py", line 201, in result
raise self._exception.with_traceback(self._exception_tb)
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\asyncio\tasks.py", line 232, in __step
result = coro.send(None)
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\cli.py", line 118, in execute
async for output in run_pipeline_with_config(
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\run.py", line 144, in run_pipeline_with_config
dataset = dataset if dataset is not None else await _create_input(config.input)
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\run.py", line 133, in _create_input
return await load_input(config, progress_reporter, root_dir)
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\input\load_input.py", line 81, in load_input
results = await loader(config, progress, storage)
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\input\text.py", line 55, in load
return pd.DataFrame([await load_file(file, group) for file, group in files])
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\input\text.py", line 55, in <listcomp>
return pd.DataFrame([await load_file(file, group) for file, group in files])
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\input\text.py", line 37, in load_file
text = await storage.get(path, encoding="utf-8")
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\storage\file_pipeline_storage.py", line 86, in get
return await self._read_file(file_path, as_bytes, encoding)
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\graphrag\index\storage\file_pipeline_storage.py", line 109, in _read_file
return await f.read()
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\site-packages\aiofiles\threadpool\utils.py", line 43, in method
return await self._loop.run_in_executor(self._executor, cb)
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\asyncio\futures.py", line 285, in __await__
yield self  # This tells Task to wait for completion.
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\asyncio\tasks.py", line 304, in __wakeup
future.result()
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\asyncio\futures.py", line 201, in result
raise self._exception.with_traceback(self._exception_tb)
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\concurrent\futures\thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa8 in position 431: invalid start byte
Ikaros-521 commented 1 month ago

graphrag version error, New issues after using the repository version

🚀 Reading settings from ragtest\settings.yaml
Traceback (most recent call last):
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\index\__main__.py", line 76, in <module>
index_cli(
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\index\cli.py", line 97, in index_cli
pipeline_config: str | PipelineConfig = config or _create_default_config(
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\index\cli.py", line 243, in _create_default_config
result = create_pipeline_config(parameters, verbose)
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\index\create_pipeline_config.py", line 132, in create_pipeline_config
*_graph_workflows(settings, embedded_fields),
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\index\create_pipeline_config.py", line 291, in _graph_workflows
"strategy": settings.entity_extraction.resolved_strategy(
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\config\models\entity_extraction_config.py", line 41, in resolved_strategy
"extraction_prompt": (Path(root_dir) / self.prompt).read_text()
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\pathlib.py", line 1135, in read_text
return f.read()
UnicodeDecodeError: 'gbk' codec can't decode byte 0x9d in position 2386: illegal multibyte sequence
Ikaros-521 commented 1 month ago

graphrag version error, New issues after using the repository version

🚀 Reading settings from ragtest\settings.yaml
Traceback (most recent call last):
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\index\__main__.py", line 76, in <module>
index_cli(
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\index\cli.py", line 97, in index_cli
pipeline_config: str | PipelineConfig = config or _create_default_config(
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\index\cli.py", line 243, in _create_default_config
result = create_pipeline_config(parameters, verbose)
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\index\create_pipeline_config.py", line 132, in create_pipeline_config
*_graph_workflows(settings, embedded_fields),
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\index\create_pipeline_config.py", line 291, in _graph_workflows
"strategy": settings.entity_extraction.resolved_strategy(
File "F:\GraphRAG-Ollama-UI\origin\GraphRAG-Local-UI\graphrag\config\models\entity_extraction_config.py", line 41, in resolved_strategy
"extraction_prompt": (Path(root_dir) / self.prompt).read_text()
File "f:\GraphRAG-Ollama-UI\Miniconda3\lib\pathlib.py", line 1135, in read_text
return f.read()
UnicodeDecodeError: 'gbk' codec can't decode byte 0x9d in position 2386: illegal multibyte sequence

global replacement

"extraction_prompt": (Path(root_dir) / self.prompt).read_text()

->

"extraction_prompt": (Path(root_dir) / self.prompt).read_text(encoding="utf-8")