Closed maschinenzeitmaschine closed 1 year ago
I'll add a little notebook showing thow that works but the basic idea is that you can just replace the api url like this:
import openai
openai.api_key = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # can be anything
openai.api_base = "http://100.64.159.73:8000/v1"
openai.Completion.create(
model="text-davinci-003", # currently can be anything
prompt="The quick brown fox jumps",
max_tokens=5,
)
Notebook with a LangChain example as well -> https://github.com/abetlen/llama-cpp-python/blob/main/examples/notebooks/Clients.ipynb
Thank you so much for your quick reply and help!! Only, unfortunately, I do not get this to run…
In the shell where I execute your example:
(venv_py3) user@mbp21 llama.cpp % /Users/myuser/Desktop/llamaccp/ven
v_py3/bin/python /Users/myuser/Desktop/llamaccp/test.py
Traceback (most recent call last):
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/openai/api_requestor.py", line 331, in handle_error_response
error_data = resp["error"]
TypeError: string indices must be integers
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/myuser/Desktop/llamaccp/test.py", line 6, in <module>
openai.Completion.create(
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/openai/api_resources/completion.py", line 25, in create
return super().create(*args, **kwargs)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
response, _, api_key = requestor.request(
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/openai/api_requestor.py", line 226, in request
resp, got_stream = self._interpret_response(result, stream)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/openai/api_requestor.py", line 620, in _interpret_response
self._interpret_response_line(
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/openai/api_requestor.py", line 683, in _interpret_response_line
raise self.handle_error_response(
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/openai/api_requestor.py", line 333, in handle_error_response
raise error.APIError(
openai.error.APIError: Invalid response object from API: 'Internal Server Error' (HTTP response code was 500)
And in the terminal where I run the server:
INFO: 127.0.0.1:56992 - "POST /v1/completions HTTP/1.1" 500 Internal Server Error
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/uvicorn/protocols/http/h11_impl.py", line 429, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 78, in __call__
return await self.app(scope, receive, send)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/fastapi/applications.py", line 276, in __call__
await super().__call__(scope, receive, send)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/starlette/applications.py", line 122, in __call__
await self.middleware_stack(scope, receive, send)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/starlette/middleware/errors.py", line 184, in __call__
raise exc
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/starlette/middleware/errors.py", line 162, in __call__
await self.app(scope, receive, _send)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/starlette/middleware/cors.py", line 84, in __call__
await self.app(scope, receive, send)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 79, in __call__
raise exc
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 68, in __call__
await self.app(scope, receive, sender)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/fastapi/middleware/asyncexitstack.py", line 21, in __call__
raise e
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
await self.app(scope, receive, send)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/starlette/routing.py", line 718, in __call__
await route.handle(scope, receive, send)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/starlette/routing.py", line 276, in handle
await self.app(scope, receive, send)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/starlette/routing.py", line 66, in app
response = await func(request)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/fastapi/routing.py", line 237, in app
raw_response = await run_endpoint_function(
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/fastapi/routing.py", line 165, in run_endpoint_function
return await run_in_threadpool(dependant.call, **values)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/starlette/concurrency.py", line 41, in run_in_threadpool
return await anyio.to_thread.run_sync(func, *args)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/anyio/to_thread.py", line 31, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
return await future
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 867, in run
result = context.run(func, *args)
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/llama_cpp/server/__main__.py", line 106, in create_completion
completion_or_chunks = llama(
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/llama_cpp/llama.py", line 527, in __call__
return self.create_completion(
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/llama_cpp/llama.py", line 488, in create_completion
completion: Completion = next(completion_or_chunks) # type: ignore
File "/Users/myuser/Desktop/llamaccp/venv_py3/lib/python3.10/site-packages/llama_cpp/llama.py", line 305, in _create_completion
assert self.ctx is not None
AssertionError
I wonder wether I understand the whole idea correctly, but it's hard to tell without documentation... am I holding it wrong? Again, thanks a lot for your help!
That error indicates the model did not load correctly, either the MODEL path is incorrect or the version is wrong, it would be slightly higher in the log.
YES, that did it! Again, thanks a lot!!
If I may suggest this, maybe change this line in the readme: export MODEL=./models/7B
to something like export MODEL=./models/7B/ggml-model-q4_0.bin
or …/7B/myModel.bin
or something.
Because I actually do remember thinking "…hu, weird that it does want a folder. How is it going to know which model it's supposed to use?", but as it was written like that… ; )
Good suggestion, I'll update that!
Hi, I am try to use this method but its giving me error.
import os
os.environ["OPENAI_API_KEY"] = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # can be anything
os.environ["OPENAI_API_BASE"] = "http://localhost:8000"
from langchain.llms import OpenAI
llms = OpenAI()
llms(
prompt="The quick brown fox jumps",
stop=[".", "\n"],
)
error
(.venv_llamaCPU) D:\personal\local-Llama>python function_call.py
D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\langchain_core\_api\deprecation.py:117: LangChainDeprecationWarning: The class `langchain_community.llms.openai.OpenAI` was deprecated in langchain-community 0.0.10 and will be removed in 0.2.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import OpenAI`.
warn_deprecated(
D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\langchain_core\_api\deprecation.py:117: LangChainDeprecationWarning: The function `__call__` was deprecated in LangChain 0.1.7 and will be removed in 0.2.0. Use invoke instead.
warn_deprecated(
Retrying langchain_community.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIError: Invalid response object from API: '{"detail":"Not Found"}' (HTTP response code was 404).
Traceback (most recent call last):
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\openai\api_requestor.py", line 403, in handle_error_response
error_data = resp["error"]
~~~~^^^^^^^^^
KeyError: 'error'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\personal\local-Llama\function_call.py", line 72, in <module>
llms(
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\langchain_core\_api\deprecation.py", line 145, in warning_emitting_wrapper
return wrapped(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\langchain_core\language_models\llms.py", line 991, in __call__
self.generate(
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\langchain_core\language_models\llms.py", line 741, in generate
output = self._generate_helper(
^^^^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\langchain_core\language_models\llms.py", line 605, in _generate_helper
raise e
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\langchain_core\language_models\llms.py", line 592, in _generate_helper
self._generate(
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\langchain_community\llms\openai.py", line 460, in _generate
response = completion_with_retry(
^^^^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\langchain_community\llms\openai.py", line 123, in completion_with_retry
return _completion_with_retry(**kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\tenacity\__init__.py", line 289, in wrapped_f
return self(f, *args, **kw)
^^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\tenacity\__init__.py", line 379, in __call__
do = self.iter(retry_state=retry_state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\tenacity\__init__.py", line 325, in iter
raise retry_exc.reraise()
^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\tenacity\__init__.py", line 158, in reraise
raise self.last_attempt.result()
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\MOHIT\miniconda3\Lib\concurrent\futures\_base.py", line 449, in result
return self.__get_result()
^^^^^^^^^^^^^^^^^^^
File "C:\Users\MOHIT\miniconda3\Lib\concurrent\futures\_base.py", line 401, in __get_result
raise self._exception
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\tenacity\__init__.py", line 382, in __call__
result = fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\langchain_community\llms\openai.py", line 121, in _completion_with_retry
return llm.client.create(**kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\openai\api_resources\completion.py", line 25, in create
return super().create(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\openai\api_resources\abstract\engine_api_resource.py", line 153, in create
response, _, api_key = requestor.request(
^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\openai\api_requestor.py", line 298, in request
resp, got_stream = self._interpret_response(result, stream)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\openai\api_requestor.py", line 700, in _interpret_response
self._interpret_response_line(
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\openai\api_requestor.py", line 765, in _interpret_response_line
raise self.handle_error_response(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\personal\local-Llama\.venv_llamaCPU\Lib\site-packages\openai\api_requestor.py", line 405, in handle_error_response
raise error.APIError(
openai.error.APIError: Invalid response object from API: '{"detail":"Not Found"}' (HTTP response code was 404)
I have the server running and everything, but I really fail to understand the documentation at http://localhost:8000/docs. Is there a simple code example of how I would interact with this from python (flask)?
Like, e.g. my code for querying OpenAI (for which this should be a "drop-in" replacement) is the following, what would be the equiqualent when using llama-cpp-python?