openai ratelimit error and ollama too slow on cpu

code:

import os
import time
from tqdm.auto import tqdm

os.environ['OPENAI_API_KEY']='abc'

import pdfplumber
from lightrag import LightRAG, QueryParam
from lightrag.llm import ollama_model_complete, ollama_embedding
from lightrag.utils import EmbeddingFunc

from lightrag.llm import gpt_4o_mini_complete

WORKING_DIR = "../run_2"

if not os.path.exists(WORKING_DIR):
    os.mkdir(WORKING_DIR)

# ========================================  OpenAI  ======================================== #
rag = LightRAG(
    working_dir=WORKING_DIR,
    llm_model_func=gpt_4o_mini_complete,
    # llm_model_func=gpt_4o_complete
)
# ========================================  Ollama  ======================================== #
# rag = LightRAG(
#    working_dir=WORKING_DIR,
#    chunk_token_size=1200,
#    llm_model_func=ollama_model_complete,
#    llm_model_name="llama3.2:1b", #, 
#    llm_model_max_async=1,
#    llm_model_max_token_size=32768,
#    llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}},
#    embedding_func=EmbeddingFunc(
#        embedding_dim=768,
#        max_token_size=8192,
#        func=lambda texts: ollama_embedding(texts, embed_model="nomic-embed-text", host="http://localhost:11434"),
#    ),
# )

pdf_path = "../CompaniesAct2013.pdf" #Constitution_of_India.pdf
pdf_text = ""
with pdfplumber.open(pdf_path) as pdf:
    for page in tqdm(pdf.pages[14:20], desc='Extract Text from pages', total=2):
        pdf_text += page.extract_text() + "\n"

rag.insert(pdf_text)

INFO:lightrag:Logger initialized for working directory: ../run_2 INFO:lightrag:Load KV llm_response_cache with 0 data INFO:lightrag:Load KV full_docs with 0 data INFO:lightrag:Load KV text_chunks with 0 data INFO:nano-vectordb:Init {'embedding_dim': 1536, 'metric': 'cosine', 'storage_file': '../run_2/vdb_entities.json'} 0 data INFO:nano-vectordb:Init {'embedding_dim': 1536, 'metric': 'cosine', 'storage_file': '../run_2/vdb_relationships.json'} 0 data INFO:nano-vectordb:Init {'embedding_dim': 1536, 'metric': 'cosine', 'storage_file': '../run_2/vdb_chunks.json'} 0 data Extract Text from pages: 6it [00:02, 2.67it/s]
INFO:lightrag:[New Docs] inserting 1 docs INFO:lightrag:[New Chunks] inserting 5 chunks INFO:lightrag:Inserting 5 vectors to chunks INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests" INFO:openai._base_client:Retrying request to /embeddings in 0.449477 seconds INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests" INFO:openai._base_client:Retrying request to /embeddings in 0.878241 seconds INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests" INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests" INFO:openai._base_client:Retrying request to /embeddings in 0.389104 seconds INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests" INFO:openai._base_client:Retrying request to /embeddings in 0.882345 seconds INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests" INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests" INFO:openai._base_client:Retrying request to /embeddings in 0.469158 seconds INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests" INFO:openai._base_client:Retrying request to /embeddings in 0.888185 seconds INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests" INFO:lightrag:Writing graph with 0 nodes, 0 edges Traceback (most recent call last): File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/tenacity/asyncio/init.py", line 114, in call result = await fn(*args, **kwargs) File "/home/arslan/Data/Learning/side_projects/LighRAG_legal_doc/LightRAG/lightrag/llm.py", line 550, in openai_embedding response = await openai_async_client.embeddings.create( File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/openai/resources/embeddings.py", line 236, in create return await self._post( File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/openai/_base_client.py", line 1839, in post return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/openai/_base_client.py", line 1533, in request return await self._request( File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/openai/_base_client.py", line 1619, in _request return await self._retry_request( File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/openai/_base_client.py", line 1666, in _retry_request return await self._request( File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/openai/_base_client.py", line 1619, in _request return await self._retry_request( File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/openai/_base_client.py", line 1666, in _retry_request return await self._request( File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/openai/_base_client.py", line 1634, in _request raise self._make_status_error_from_response(err.response) from None openai.RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

The above exception was the direct cause of the following exception:

Traceback (most recent call last): File "/home/arslan/Data/Learning/side_projects/LighRAG_legal_doc/LightRAG/1_run_ollama.py", line 63, in rag.insert(pdf_text) File "/home/arslan/Data/Learning/side_projects/LighRAG_legal_doc/LightRAG/lightrag/lightrag.py", line 227, in insert return loop.run_until_complete(self.ainsert(string_or_strings)) File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete return future.result() File "/home/arslan/Data/Learning/side_projects/LighRAG_legal_doc/LightRAG/lightrag/lightrag.py", line 271, in ainsert await self.chunks_vdb.upsert(inserting_chunks) File "/home/arslan/Data/Learning/side_projects/LighRAG_legal_doc/LightRAG/lightrag/storage.py", line 98, in upsert embeddings_list = await asyncio.gather( File "/home/arslan/Data/Learning/side_projects/LighRAG_legal_doc/LightRAG/lightrag/utils.py", line 89, in wait_func result = await func(*args, kwargs) File "/home/arslan/Data/Learning/side_projects/LighRAG_legal_doc/LightRAG/lightrag/utils.py", line 45, in call return await self.func(*args, *kwargs) File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/tenacity/asyncio/init.py", line 189, in async_wrapped return await copy(fn, args, kwargs) File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/tenacity/asyncio/init.py", line 111, in call do = await self.iter(retry_state=retry_state) File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/tenacity/asyncio/init.py", line 153, in iter result = await action(retry_state) File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/tenacity/_utils.py", line 99, in inner return call(*args, **kwargs) File "/home/arslan/.virtualenvs/lightrag/lib/python3.10/site-packages/tenacity/init.py", line 419, in exc_check raise retry_exc from fut.exception() tenacity.RetryError: RetryError[<Future at 0x73ece6a8e8c0 state=finished raised RateLimitError>]

HKUDS / LightRAG

openai ratelimit error and ollama too slow on cpu #293