Closed ayanjiushishuai closed 2 months ago
when I use llama3(80k input), I have similar error information in global search part, when I use qwen2:7b(320k input), It solved.
While the local search still can not work: ZeroDivisionError: Weights sum to zero, can't be normalized
Consolidating alternate model issues here: #657
when I use llama3(80k input), I have similar error information in global search part, when I use qwen2:7b(320k input), It solved.
While the local search still can not work: ZeroDivisionError: Weights sum to zero, can't be normalized
@yurochang
I think you mean 8k input for llama3. Are you also using ollama for embeddings? I was able to run local query without error when I modified the code in graphrag\query\llm\oai\embedding.py
as the following (need to pip install ollama
first), but it yields completely out of context results.
I also tried other solutions in the github issues, either it's giving out of context results or the same error. And I printed the context_text
after the context building (before the local search happens), it was kind of related to the input, but it wasn't including anything related to my question.
I'm thinking it might be due to those errors when creating the community reports and our models were too small. Today, ollama supports the newest llama 3.1 model with 128k context window, I'm going to give it a try.
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""OpenAI Embedding model implementation."""
import asyncio
from collections.abc import Callable
from typing import Any
import numpy as np
import tiktoken
from tenacity import (
AsyncRetrying,
RetryError,
Retrying,
retry_if_exception_type,
stop_after_attempt,
wait_exponential_jitter,
)
from graphrag.query.llm.base import BaseTextEmbedding
from graphrag.query.llm.oai.base import OpenAILLMImpl
from graphrag.query.llm.oai.typing import (
OPENAI_RETRY_ERROR_TYPES,
OpenaiApiType,
)
from graphrag.query.llm.text_utils import chunk_text
from graphrag.query.progress import StatusReporter
import ollama
import json
class OpenAIEmbedding(BaseTextEmbedding, OpenAILLMImpl):
"""Wrapper for OpenAI Embedding models."""
def __init__(
self,
api_key: str | None = None,
azure_ad_token_provider: Callable | None = None,
model: str = "text-embedding-3-small",
deployment_name: str | None = None,
api_base: str | None = None,
api_version: str | None = None,
api_type: OpenaiApiType = OpenaiApiType.OpenAI,
organization: str | None = None,
encoding_name: str = "cl100k_base",
max_tokens: int = 8191,
max_retries: int = 10,
request_timeout: float = 180.0,
retry_error_types: tuple[type[BaseException]] = OPENAI_RETRY_ERROR_TYPES, # type: ignore
reporter: StatusReporter | None = None,
):
OpenAILLMImpl.__init__(
self=self,
api_key=api_key,
azure_ad_token_provider=azure_ad_token_provider,
deployment_name=deployment_name,
api_base=api_base,
api_version=api_version,
api_type=api_type, # type: ignore
organization=organization,
max_retries=max_retries,
request_timeout=request_timeout,
reporter=reporter,
)
self.model = model
self.encoding_name = encoding_name
self.max_tokens = max_tokens
self.token_encoder = tiktoken.get_encoding(self.encoding_name)
self.retry_error_types = retry_error_types
def embed(self, text: str, **kwargs: Any) -> list[float]:
"""
Embed text using OpenAI Embedding's sync function.
For text longer than max_tokens, chunk texts into max_tokens, embed each chunk, then combine using weighted average.
Please refer to: https://github.com/openai/openai-cookbook/blob/main/examples/Embedding_long_inputs.ipynb
"""
token_chunks = chunk_text(
text=text, token_encoder=self.token_encoder, max_tokens=self.max_tokens
)
chunk_embeddings = []
chunk_lens = []
for chunk in token_chunks:
try:
embedding, chunk_len = self._embed_with_retry(chunk, **kwargs)
chunk_embeddings.append(embedding)
chunk_lens.append(chunk_len)
# TODO: catch a more specific exception
except Exception as e: # noqa BLE001
self._reporter.error(
message="Error embedding chunk",
details={self.__class__.__name__: str(e)},
)
continue
chunk_embeddings = np.average(chunk_embeddings, axis=0, weights=chunk_lens)
chunk_embeddings = chunk_embeddings / np.linalg.norm(chunk_embeddings)
return chunk_embeddings.tolist()
async def aembed(self, text: str, **kwargs: Any) -> list[float]:
"""
Embed text using OpenAI Embedding's async function.
For text longer than max_tokens, chunk texts into max_tokens, embed each chunk, then combine using weighted average.
"""
token_chunks = chunk_text(
text=text, token_encoder=self.token_encoder, max_tokens=self.max_tokens
)
chunk_embeddings = []
chunk_lens = []
embedding_results = await asyncio.gather(*[
self._aembed_with_retry(chunk, **kwargs) for chunk in token_chunks
])
embedding_results = [result for result in embedding_results if result[0]]
chunk_embeddings = [result[0] for result in embedding_results]
chunk_lens = [result[1] for result in embedding_results]
chunk_embeddings = np.average(chunk_embeddings, axis=0, weights=chunk_lens) # type: ignore
chunk_embeddings = chunk_embeddings / np.linalg.norm(chunk_embeddings)
return chunk_embeddings.tolist()
def _embed_with_retry(
self, text: str | tuple, **kwargs: Any
) -> tuple[list[float], int]:
try:
retryer = Retrying(
stop=stop_after_attempt(self.max_retries),
wait=wait_exponential_jitter(max=10),
reraise=True,
retry=retry_if_exception_type(self.retry_error_types),
)
for attempt in retryer:
with attempt:
# embedding = (
# self.sync_client.embeddings.create( # type: ignore
# input=text,
# model=self.model,
# **kwargs, # type: ignore
# )
# .data[0]
# .embedding
# or []
# )
if isinstance(text, tuple):
text = json.dumps(text)
embedding = ollama.embeddings(model="nomic-embed-text", prompt=text)
embedding = list(embedding["embedding"])
return (embedding, len(text))
except RetryError as e:
self._reporter.error(
message="Error at embed_with_retry()",
details={self.__class__.__name__: str(e)},
)
return ([], 0)
else:
# TODO: why not just throw in this case?
return ([], 0)
async def _aembed_with_retry(
self, text: str | tuple, **kwargs: Any
) -> tuple[list[float], int]:
try:
retryer = AsyncRetrying(
stop=stop_after_attempt(self.max_retries),
wait=wait_exponential_jitter(max=10),
reraise=True,
retry=retry_if_exception_type(self.retry_error_types),
)
async for attempt in retryer:
with attempt:
# embedding = (
# await self.async_client.embeddings.create( # type: ignore
# input=text,
# model=self.model,
# **kwargs, # type: ignore
# )
# ).data[0].embedding or []
if isinstance(text, tuple):
text = json.dumps(text)
embedding = ollama.embeddings(model="nomic-embed-text", prompt=text)
embedding = list(embedding["embedding"])
return (embedding, len(text))
except RetryError as e:
self._reporter.error(
message="Error at embed_with_retry()",
details={self.__class__.__name__: str(e)},
)
return ([], 0)
else:
# TODO: why not just throw in this case?
return ([], 0)
当我使用llama3(80k输入)时,我在全局搜索部分有类似的错误信息,当我使用qwen2:7b(320k输入)时,它解决了。
虽然本地搜索仍然无法工作:ZeroDivisionError:权重总和为零,无法进行规范化
qwen2:7b再用,但是仍然存在该问题
Describe the bug
python -m graphrag.index --root ./ragtest
create_final_entities
part,errors occurred.Steps to reproduce
curl -fsSL https://ollama.com/install.sh | sh
.env
andsettings.ymal
down hereExpected Behavior
pipline should work well .
GraphRAG Config Used
settings.yaml
config is like that:The
.env
config is like:Logs and screenshots
log file:
However,when I copy this JSON string to test, the format seems correct.
And I have try some solution,like manually change json format and change the format of prompt.It do help when the output is not standard JOSN string.But my output now looks ok, but there are still errors.
What's more,I try different models like qwen2:1.5b and phi3.But they all are small size model.Does this mean GraphRAG don't support these small model?
Additional Information