JSONDecodeError while generating communities.

ink7-sudo commented 2 months ago

Error

⠏ Processed 9 communities
⠋ Processed 10 communities
INFO:nano-graphrag:Writing graph with 568 nodes, 72 edges
Traceback (most recent call last):
  File "/home/djj/nano-graphrag/nano_graphrag/graphrag.py", line 314, in ainsert
    await generate_community_report(
  File "/home/djj/nano-graphrag/nano_graphrag/_op.py", line 583, in generate_community_report
    this_level_communities_reports = await asyncio.gather(
  File "/home/djj/nano-graphrag/nano_graphrag/_op.py", line 560, in _form_single_community_report
    data = use_string_json_convert_func(response)
  File "/home/djj/nano-graphrag/nano_graphrag/_utils.py", line 37, in convert_response_to_json
    raise e from None
  File "/home/djj/nano-graphrag/nano_graphrag/_utils.py", line 33, in convert_response_to_json
    data = json.loads(json_str)
  File "/home/djj/anaconda3/envs/nano/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/home/djj/anaconda3/envs/nano/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/home/djj/anaconda3/envs/nano/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 22 column 10 (char 2617)

Issue

I'm using the no_openai_key_at_all.py Replaced the Ollama call with a local LLM call. I know this is because something wrong with the response from llm. But you have already implemented exception handling, i dont know why the program still throw an error here and stop?

How to reproduce


import os
import logging

import numpy as np
from nano_graphrag import GraphRAG, QueryParam
from nano_graphrag.base import BaseKVStorage
from nano_graphrag._utils import compute_args_hash, wrap_embedding_func_with_attrs
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
logging.basicConfig(level=logging.WARNING)
logging.getLogger("nano-graphrag").setLevel(logging.INFO)

# !!! qwen2-7B maybe produce unparsable results and cause the extraction of graph to fail.
WORKING_DIR = "./nano_graphrag_cache_QWEN_TEST"

model_name = "Qwen/Qwen2-7B-Instruct"
device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

EMBED_MODEL = SentenceTransformer(
    "sentence-transformers/all-MiniLM-L6-v2", cache_folder=WORKING_DIR, device="cpu"
)

# We're using Sentence Transformers to generate embeddings for the BGE model
@wrap_embedding_func_with_attrs(
    embedding_dim=EMBED_MODEL.get_sentence_embedding_dimension(),
    max_token_size=EMBED_MODEL.max_seq_length,
)
async def local_embedding(texts: list[str]) -> np.ndarray:
    return EMBED_MODEL.encode(texts, normalize_embeddings=True)

async def ollama_model_if_cache(
    prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
    #ollama_client = ollama.AsyncClient()
    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})

    # Get the cached response if having-------------------
    hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
    messages.extend(history_messages)
    messages.append({"role": "user", "content": prompt})
    if hashing_kv is not None:
        args_hash = compute_args_hash(model, messages)
        if_cache_return = await hashing_kv.get_by_id(args_hash)
        if if_cache_return is not None:
            return if_cache_return["return"]
    # -----------------------------------------------------
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    result = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    # Cache the response if having-------------------
    if hashing_kv is not None:
        await hashing_kv.upsert({args_hash: {"return": result, "model": model}})
    # -----------------------------------------------------
    return result

def remove_if_exist(file):
    if os.path.exists(file):
        os.remove(file)

def query():
    rag = GraphRAG(
        working_dir=WORKING_DIR,
        best_model_func=ollama_model_if_cache,
        cheap_model_func=ollama_model_if_cache,
        embedding_func=local_embedding,
    )
    print(
        rag.query(
            "What are the top themes in this story?", param=QueryParam(mode="global")
        )
    )

def insert():
    from time import time

    with open("/home/djj/nano-graphrag/tests/mock_data.txt", encoding="utf-8-sig") as f:
        FAKE_TEXT = f.read()

    remove_if_exist(f"{WORKING_DIR}/vdb_entities.json")
    remove_if_exist(f"{WORKING_DIR}/kv_store_full_docs.json")
    remove_if_exist(f"{WORKING_DIR}/kv_store_text_chunks.json")
    remove_if_exist(f"{WORKING_DIR}/kv_store_community_reports.json")
    remove_if_exist(f"{WORKING_DIR}/graph_chunk_entity_relation.graphml")

    rag = GraphRAG(
        working_dir=WORKING_DIR,
        enable_llm_cache=True,
        best_model_func=ollama_model_if_cache,
        cheap_model_func=ollama_model_if_cache,
        embedding_func=local_embedding,
    )
    start = time()
    rag.insert(FAKE_TEXT)
    print("indexing time:", time() - start)
    # rag = GraphRAG(working_dir=WORKING_DIR, enable_llm_cache=True)
    # rag.insert(FAKE_TEXT[half_len:])

if __name__ == "__main__":
    insert()
    query()

rangehow commented 2 months ago

see FQA

ink7-sudo commented 2 months ago

see FQA

but im not using ollama, how to update the config file?

rangehow commented 2 months ago

see FQA

but im not using ollama, how to update the config file?

You can find the model files under the cache path of Hugging Face. By modifying the config.json file and the way you load the model in your example, you can support a larger context length.

Add this to the config.json:

{
  ...,
  "rope_scaling": {
    "factor": 4.0,
    "original_max_position_embeddings": 32768,
    "type": "yarn"
  }
}

At the same time, set the max_new_tokens for generate to 8192.

ink7-sudo commented 1 month ago

Thank you very much for your help! But i met a new problem.

Traceback (most recent call last):
  File "/home/djj/nano-graphrag/local_llm_test/Qwen.py", line 130, in <module>
    insert()
  File "/home/djj/nano-graphrag/local_llm_test/Qwen.py", line 123, in insert
    rag.insert(FAKE_TEXT)
  File "/home/djj/nano-graphrag/nano_graphrag/graphrag.py", line 204, in insert
    return loop.run_until_complete(self.ainsert(string_or_strings))
  File "/home/djj/anaconda3/envs/nano/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
    return future.result()
  File "/home/djj/nano-graphrag/nano_graphrag/graphrag.py", line 322, in ainsert
    await self._insert_done()
  File "/home/djj/nano-graphrag/nano_graphrag/_storage.py", line 37, in index_done_callback
    write_json(self._data, self._file_name)
  File "/home/djj/nano-graphrag/nano_graphrag/_utils.py", line 74, in write_json
    json.dump(json_obj, f, indent=2, ensure_ascii=False)
  File "/home/djj/anaconda3/envs/nano/lib/python3.10/json/__init__.py", line 179, in dump
    for chunk in iterable:
  File "/home/djj/anaconda3/envs/nano/lib/python3.10/json/encoder.py", line 431, in _iterencode
    yield from _iterencode_dict(o, _current_indent_level)
  File "/home/djj/anaconda3/envs/nano/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
    yield from chunks
  File "/home/djj/anaconda3/envs/nano/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
    yield from chunks
  File "/home/djj/anaconda3/envs/nano/lib/python3.10/json/encoder.py", line 438, in _iterencode
    o = _default(o)
  File "/home/djj/anaconda3/envs/nano/lib/python3.10/json/encoder.py", line 179, in default
    raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type Qwen2ForCausalLM is not JSON serializable

I'm kind of confused what this is for? and how to solve it. I do appreciate your kind help and patience.

rangehow commented 1 month ago

we cache llm response by llm name. In your case you try to put a qwen model into a json file instead of just 'qwen' .

Replace all model below to qwen string instead.

args_hash = compute_args_hash(model, messages) ...

hashing_kv.upsert({args_hash: {"return": result, "model": model}})

dangyuuki123 commented 1 month ago

how to run with Qwen 2 together ai ? @rangehow

rangehow commented 1 month ago

how to run with Qwen 2 together ai ? 如何与Qwen 2一起运行ai？@rangehow

You can access model hosted on together via their API. Check out our example of custom model for a try.

dangyuuki123 commented 1 month ago

when i run Qwen 2 by together , is error:

File ~/test/nano_graphrag/graphrag.py:221, in GraphRAG.query(self, query, param) 219 def query(self, query: str, param: QueryParam = QueryParam()): 220 loop = always_get_an_event_loop() --> 221 return loop.run_until_complete(self.aquery(query, param))

File ~/anaconda3/lib/python3.11/site-packages/nest_asyncio.py:98, in _patch_loop..run_until_complete(self, future) 95 if not f.done(): 96 raise RuntimeError( 97 'Event loop stopped before Future completed.') ---> 98 return f.result()

File ~/anaconda3/lib/python3.11/asyncio/futures.py:203, in Future.result(self) 201 self.__log_traceback = False 202 if self._exception is not None: --> 203 raise self._exception.with_traceback(self._exception_tb) 204 return self._result

File ~/anaconda3/lib/python3.11/asyncio/tasks.py:269, in Task.__step(failed resolving arguments) 267 result = coro.send(None) 268 else: --> 269 result = coro.throw(exc) 270 except StopIteration as exc: 271 if self._must_cancel: 272 # Task is cancelled right before coro stops.

File ~/test/nano_graphrag/graphrag.py:239, in GraphRAG.aquery(self, query, param) 229 230 231 (...) 236 237 238 --> 239 240 241 242 243 244 245 246 247 248 249 250 251 (...) 254 255 response = await local_query( query, self.chunk_entity_relation_graph, asdict(self), ) elif param.mode == "global": response = await global_query( query, self.chunk_entity_relation_graph, self.entities_vdb, self.community_reports, self.text_chunks, param, asdict(self), ) elif param.mode == "naive": response = await naive_query( query, self.chunks_vdb, asdict(self), )

File ~/test/nano_graphrag/_op.py:1028, in global_query(query, knowledge_graph_inst, entities_vdb, community_reports, text_chunks_db, query_param, global_config) 1021 community_datas = sorted( 1022 community_datas, 1023 key=lambda x: (x["occurrence"], x["report_json"].get("rating", 0)), 1024 reverse=True, 1025 ) 1026 logger.info(f"Revtrieved {len(community_datas)} communities") -> 1028 map_communities_points = await _map_global_communities( 1029 query, community_datas, query_param, global_config 1030 ) 1031 final_support_points = [] 1032 for i, mc in enumerate(map_communities_points):

File ~/test/nano_graphrag/_op.py:983, in _map_global_communities(query, communities_data, query_param, global_config) 980 return data.get("points", []) 982 logger.info(f"Grouping to {len(community_groups)} groups for global search") --> 983 responses = await asyncio.gather(*[_process(c) for c in community_groups]) 984 return responses

File ~/anaconda3/lib/python3.11/asyncio/tasks.py:339, in Task.wakeup(self, future) 337 def wakeup(self, future): 338 try: --> 339 future.result() 340 except BaseException as exc: 341 # This may also be a cancellation. 342 self.__step(exc)

File ~/anaconda3/lib/python3.11/asyncio/tasks.py:267, in Task.step(failed resolving arguments) 263 try: 264 if exc is None: 265 # We use the send method directly, because coroutines 266 # don't have `iterandnext__` methods. --> 267 result = coro.send(None) 268 else: 269 result = coro.throw(exc)

File ~/test/nano_graphrag/_op.py:979, in _map_global_communities.._process(community_truncated_datas) 973 sys_prompt = sys_prompt_temp.format(context_data=community_context) 974 response = await use_model_func( 975 query, 976 system_prompt=sys_prompt, 977 **query_param.global_special_community_map_llm_kwargs, 978 ) --> 979 data = use_string_json_convert_func(response) 980 return data.get("points", [])

File ~/test/nano_graphrag/_utils.py:31, in convert_response_to_json(response) 29 def convert_response_to_json(response: str) -> dict: 30 json_str = locate_json_string_body_from_string(response) ---> 31 assert json_str is not None, f"Unable to parse JSON from response: {response}" 32 try: 33 data = json.loads(json_str)

AssertionError: Unable to parse JSON from response:

ink7-sudo commented 1 month ago

we cache llm response by llm name. In your case you try to put a qwen model into a json file instead of just 'qwen' .

Replace all model below to qwen string instead.

args_hash = compute_args_hash(model, messages) ...

hashing_kv.upsert({args_hash: {"return": result, "model": model}})

we cache llm response by llm name. In your case you try to put a qwen model into a json file instead of just 'qwen' .

Replace all model below to qwen string instead.

args_hash = compute_args_hash(model, messages) ...

hashing_kv.upsert({args_hash: {"return": result, "model": model}})

sorry i want to ask another question. When to use the best_model_func, cheap_model_func and embedding_func separately?. I can't figure it out by checking through the code.

rangehow commented 1 month ago

we cache llm response by llm name. In your case you try to put a qwen model into a json file instead of just 'qwen' . Replace all model below to qwen string instead. args_hash = compute_args_hash(model, messages) ...args_hash =compute_args_hash（模型，消息）... hashing_kv.upsert({args_hash: {"return": result, "model": model}})

we cache llm response by llm name. In your case you try to put a qwen model into a json file instead of just 'qwen' . Replace all model below to qwen string instead. args_hash = compute_args_hash(model, messages) ...args_hash =compute_args_hash（模型，消息）... hashing_kv.upsert({args_hash: {"return": result, "model": model}})

sorry i want to ask another question. When to use the best_model_func, cheap_model_func and embedding_func separately?. I can't figure it out by checking through the code.抱歉，我想问另一个问题。何时分别使用 best_model_func、cheap_model_func 和 embedding_func？我通过检查代码无法弄清楚。

To make it easier for the community to retrieve relevant issues, please open a new issue. Thank you.

gusye1234 / nano-graphrag