I am trying to add a custom model : microsoft/Phi-3-mini-128k-instruct. After successful loading, when I try to use the model in the chat, I get the following error. I am wondering why the chat ui is connecting to openai.
Traceback (most recent call last):
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/queueing.py", line 527, in process_events
response = await route_utils.call_process_api(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/route_utils.py", line 261, in call_process_api
output = await app.get_blocks().process_api(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/blocks.py", line 1786, in process_api
result = await self.call_function(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/blocks.py", line 1350, in call_function
prediction = await utils.async_iteration(iterator)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/utils.py", line 583, in async_iteration
return await iterator.anext()
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/utils.py", line 709, in asyncgen_wrapper
response = await iterator.anext()
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/chat_interface.py", line 545, in _stream_fn
first_response = await async_iteration(generator)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/utils.py", line 583, in async_iteration
return await iterator.anext()
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/utils.py", line 576, in anext
return await anyio.to_thread.run_sync(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 851, in run
result = context.run(func, args)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/utils.py", line 559, in run_sync_iterator_async
return next(iterator)
^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/chat_with_mlx/app.py", line 203, in chatbot
response = client.chat.completions.create(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/openai/_utils/_utils.py", line 277, in wrapper
return func(args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/openai/resources/chat/completions.py", line 581, in create
return self._post(
^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/openai/_base_client.py", line 1232, in post
return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/openai/_base_client.py", line 921, in request
return self._request(
^^^^^^^^^^^^^^
File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/openai/_base_client.py", line 1012, in _request
raise self._make_status_error_from_response(err.response) from None
openai.NotFoundError:
I am trying to add a custom model : microsoft/Phi-3-mini-128k-instruct. After successful loading, when I try to use the model in the chat, I get the following error. I am wondering why the chat ui is connecting to openai.
Traceback (most recent call last): File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/queueing.py", line 527, in process_events response = await route_utils.call_process_api( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/route_utils.py", line 261, in call_process_api output = await app.get_blocks().process_api( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/blocks.py", line 1786, in process_api result = await self.call_function( ^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/blocks.py", line 1350, in call_function prediction = await utils.async_iteration(iterator) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/utils.py", line 583, in async_iteration return await iterator.anext() ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/utils.py", line 709, in asyncgen_wrapper response = await iterator.anext() ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/chat_interface.py", line 545, in _stream_fn first_response = await async_iteration(generator) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/utils.py", line 583, in async_iteration return await iterator.anext() ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/utils.py", line 576, in anext return await anyio.to_thread.run_sync( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/anyio/to_thread.py", line 56, in run_sync return await get_async_backend().run_sync_in_worker_thread( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread return await future ^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 851, in run result = context.run(func, args) ^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/gradio/utils.py", line 559, in run_sync_iterator_async return next(iterator) ^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/chat_with_mlx/app.py", line 203, in chatbot response = client.chat.completions.create( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/openai/_utils/_utils.py", line 277, in wrapper return func(args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/openai/resources/chat/completions.py", line 581, in create return self._post( ^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/openai/_base_client.py", line 1232, in post return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/openai/_base_client.py", line 921, in request return self._request( ^^^^^^^^^^^^^^ File "/Users/arjun/LLM/MLX-LM/chat-with-mlx/.venv_mlx/lib/python3.12/site-packages/openai/_base_client.py", line 1012, in _request raise self._make_status_error_from_response(err.response) from None openai.NotFoundError: