Closed fantastic-413 closed 8 months ago
I've solved the above problem but ran into a new one.
When I try to use meta-llama\Llama-2-13b-chat-hf
to run python -m kg_rag.rag_based_generation.Llama.text_generation
, the following error is reported:
PS D:\project\github\KG_RAG> python -m kg_rag.rag_based_generation.Llama.text_generation
Enter your question : What is the gene associated with hypochondrogenesis? Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 3.85it/s] Retrieving context from SPOKE graph... Here is the KG-RAG based answer using Llama:
[INST]<
> You are an expert biomedical researcher. For answering the Question at the end with brevity, you need to first read the Context provided. Then give your final answer briefly, by citing the Provenance information from the context. You can find Provenance from the Context statement 'Provenance of this association is '. Do not forget to cite the Provenance information. <>
Context:
Disease hypochondrogenesis associates Gene COL2A1 and Provenance of this association is DISEASES.
Question: What is the gene associated with Traceback (most recent call last): File "C:\Users\19011.conda\envs\kg_rag\lib\runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "C:\Users\19011.conda\envs\kg_rag\lib\runpy.py", line 86, in _run_code exec(code, run_globals) File "D:\project\github\KG_RAG\kg_rag\rag_based_generation\Llama\text_generation.py", line 58, in
main() File "D:\project\github\KG_RAG\kg_rag\rag_based_generation\Llama\text_generation.py", line 47, in main output = llm_chain.run(context=context, question=question) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\chains\base.py", line 492, in run return self(kwargs, callbacks=callbacks, tags=tags, metadata=metadata)[ File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\chains\base.py", line 292, in call raise e File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\chains\base.py", line 286, in call self._call(inputs, run_manager=run_manager) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\chains\llm.py", line 93, in _call response = self.generate([inputs], run_manager=run_manager) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\chains\llm.py", line 103, in generate return self.llm.generate_prompt( File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\llms\base.py", line 504, in generate_prompt return self.generate(prompt_strings, stop=stop, callbacks=callbacks, kwargs) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\llms\base.py", line 653, in generate output = self._generate_helper( File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\llms\base.py", line 541, in _generate_helper raise e File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\llms\base.py", line 528, in _generate_helper self._generate( File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\llms\base.py", line 1048, in _generate self._call(prompt, stop=stop, run_manager=run_manager, kwargs) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\langchain\llms\huggingface_pipeline.py", line 167, in _call response = self.pipeline(prompt) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\pipelines\text_generation.py", line 205, in call return super().call(text_inputs, kwargs) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\pipelines\base.py", line 1140, in call return self.run_single(inputs, preprocess_params, forward_params, postprocess_params) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\pipelines\base.py", line 1147, in run_single model_outputs = self.forward(model_inputs, forward_params) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\pipelines\base.py", line 1046, in forward model_outputs = self._forward(model_inputs, forward_params) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\pipelines\text_generation.py", line 268, in _forward generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, generate_kwargs) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context return func(*args, kwargs) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\generation\utils.py", line 1648, in generate return self.sample( File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\generation\utils.py", line 2730, in sample outputs = self( File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\models\llama\modeling_llama.py", line 820, in forward outputs = self.model( File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\models\llama\modeling_llama.py", line 708, in forward layer_outputs = decoder_layer( File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\models\llama\modeling_llama.py", line 424, in forward hidden_states, self_attn_weights, present_key_value = self.self_attn( File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\transformers\models\llama\modeling_llama.py", line 321, in forward query_states = self.q_proj(hidden_states) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "C:\Users\19011.conda\envs\kg_rag\lib\site-packages\torch\nn\modules\linear.py", line 114, in forward return F.linear(input, self.weight, self.bias) RuntimeError: "addmm_implcpu" not implemented for 'Half'
And my Llama only has 2/2 checkpoint shards
, in your demo, there are 3/3 checkpoint shards
.
@fantastic-413 Great to know that you were able to solve the first issue! Regarding the second one, may I know if you are using GPU with CUDA to run the Llama model? If not, it's probable that the error you're observing is due to attempting to employ float16 on a CPU which is not supported. (This information is as per this source).
Ref: https://github.com/huggingface/transformers/issues/25891 https://huggingface.co/openlm-research/open_llama_7b_v2/discussions/2
Thank you, that's exactly why. I don't have CUDA in my laptop. I replaced torch.float16
in the def llama_model()
function in the KG_RAG\kg_rag\utility.py
file with torch.bfloat16
, and it can run normally without using CUDA.
But there is another question, why is there 2 when I loading checkpoint shards, but it is 3 in your demonstration video.Did you use other versions of the Llama model?
@fantastic-413 Glad to know that it worked for you :) no, I used the same model i.e. Llama-2-13b-chat-hf. My best guess why you see the difference in the number of shards is due to the difference in the machine that we run the model. Sharding is mainly used to split the model parameters, gradients, checkpoints etc. across the worker nodes of the machine. Hence, difference in machines could possibly lead to different number of shards (because hardware resources could be different).
@karthiksoman Maybe you are right, thank you again, I have no problem for now.
Since your issue is resolved, I am closing this ticket.
When I try to run
python -m kg_rag.rag_based_generation.GPT.text_generation -g "gpt-4"
,I get stuck at step 1 and report the following error: `Enter your question : Are there any genes that are commonly shared by parkinsons disease and rem sleep disorder?Press enter for Step 1 - Disease entity extraction using GPT-3.5-Turbo Processing ... Traceback (most recent call last): File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/connection.py", line 203, in _new_conn sock = connection.create_connection( File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/util/connection.py", line 85, in create_connection raise err File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/util/connection.py", line 73, in create_connection sock.connect(sa) OSError: [Errno 101] Network is unreachable
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/connectionpool.py", line 790, in urlopen response = self._make_request( File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/connectionpool.py", line 491, in _make_request raise new_e File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/connectionpool.py", line 467, in _make_request self._validate_conn(conn) File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1092, in _validate_conn conn.connect() File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/connection.py", line 611, in connect self.sock = sock = self._new_conn() File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/connection.py", line 218, in _new_conn raise NewConnectionError( urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fc6a4a419c0>: Failed to establish a new connection: [Errno 101] Network is unreachable
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/requests/adapters.py", line 486, in send resp = conn.urlopen( File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/connectionpool.py", line 874, in urlopen return self.urlopen( File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/connectionpool.py", line 874, in urlopen return self.urlopen( File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/connectionpool.py", line 844, in urlopen retries = retries.increment( File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/urllib3/util/retry.py", line 515, in increment raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type] urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='api.openai.com', port=443): Max retries exceeded with url: /v1/chat/completions (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fc6a4a419c0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/openai/api_requestor.py", line 606, in request_raw result = _thread_context.session.request( File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/requests/sessions.py", line 589, in request resp = self.send(prep, send_kwargs) File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/requests/sessions.py", line 703, in send r = adapter.send(request, kwargs) File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/requests/adapters.py", line 519, in send raise ConnectionError(e, request=request) requests.exceptions.ConnectionError: HTTPSConnectionPool(host='api.openai.com', port=443): Max retries exceeded with url: /v1/chat/completions (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fc6a4a419c0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/tenacity/init.py", line 382, in call result = fn(*args, *kwargs) File "/storeDisk2/lsp/KG_RAG/kg_rag/utility.py", line 183, in fetch_GPT_response response = openai.ChatCompletion.create( File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create return super().create(args, **kwargs) File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/openai/api_resources/abstract/engine_apiresource.py", line 155, in create response, , api_key = requestor.request( File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/openai/api_requestor.py", line 289, in request result = self.request_raw( File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/openai/api_requestor.py", line 619, in request_raw raise error.APIConnectionError( openai.error.APIConnectionError: Error communicating with OpenAI: HTTPSConnectionPool(host='api.openai.com', port=443): Max retries exceeded with url: /v1/chat/completions (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fc6a4a419c0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/runpy.py", line 86, in _run_code exec(code, run_globals) File "/storeDisk2/lsp/KG_RAG/kg_rag/rag_based_generation/GPT/text_generation.py", line 56, in
main()
File "/storeDisk2/lsp/KG_RAG/kg_rag/rag_based_generation/GPT/text_generation.py", line 51, in main
interactive(question, vectorstore, node_context_df, embedding_function_for_context_retrieval, CHAT_MODEL_ID)
File "/storeDisk2/lsp/KG_RAG/kg_rag/utility.py", line 303, in interactive
entities = disease_entity_extractor_v2(question)
File "/storeDisk2/lsp/KG_RAG/kg_rag/utility.py", line 232, in disease_entity_extractor_v2
resp = get_GPT_response(prompt_updated, system_prompts["DISEASE_ENTITY_EXTRACTION"], chat_model_id, chat_deployment_id, temperature=0)
File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/joblib/memory.py", line 655, in call
return self._cached_call(args, kwargs)[0]
File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/joblib/memory.py", line 598, in _cached_call
out, metadata = self.call(*args, kwargs)
File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/joblib/memory.py", line 856, in call
output = self.func(*args, *kwargs)
File "/storeDisk2/lsp/KG_RAG/kg_rag/utility.py", line 203, in get_GPT_response
return fetch_GPT_response(instruction, system_prompt, chat_model_id, chat_deployment_id, temperature)
File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/tenacity/init.py", line 289, in wrapped_f
return self(f, args, kw)
File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/tenacity/init.py", line 379, in call
do = self.iter(retry_state=retry_state)
File "/storeDisk2/lsp/miniconda3/envs/kg_rag/lib/python3.10/site-packages/tenacity/init.py", line 326, in iter
raise retry_exc from fut.exception()
tenacity.RetryError: RetryError[<Future at 0x7fc6a55d7670 state=finished raised APIConnectionError>]`