When trying out inference with locally deployed model named ai-mixtral-8x7b-instruct on nim using Langhcain NVIDIA AI Connector, I am unable to get response from LLM.
When I inspected it turned out, if you host the model with name ai-mixtral-8x7b-instruct (i.e. the name which we're using for api-catalog inference), then the call to NIM fails as it tries to call mistral/mixtral-8x7b-instruct which is the mapping.
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 264, in __call__
await wrap(partial(self.listen_for_disconnect, receive))
File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 260, in wrap
await func()
File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 237, in listen_for_disconnect
message = await receive()
File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 580, in receive
await self.message_event.wait()
File "/usr/lib/python3.10/asyncio/locks.py", line 214, in wait
await fut
asyncio.exceptions.CancelledError: Cancelled by cancel scope 795b98517190
During handling of the above exception, another exception occurred:
+ Exception Group Traceback (most recent call last):
| File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 419, in run_asgi
| result = await app( # type: ignore[func-returns-value]
| File "/usr/local/lib/python3.10/dist-packages/uvicorn/middleware/proxy_headers.py", line 84, in __call__
| return await self.app(scope, receive, send)
| File "/usr/local/lib/python3.10/dist-packages/fastapi/applications.py", line 1054, in __call__
| await super().__call__(scope, receive, send)
| File "/usr/local/lib/python3.10/dist-packages/starlette/applications.py", line 123, in __call__
| await self.middleware_stack(scope, receive, send)
| File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/errors.py", line 186, in __call__
| raise exc
| File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/errors.py", line 164, in __call__
| await self.app(scope, receive, _send)
| File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/cors.py", line 83, in __call__
| await self.app(scope, receive, send)
| File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/exceptions.py", line 62, in __call__
| await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
| File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 64, in wrapped_app
| raise exc
| File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
| await app(scope, receive, sender)
| File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 758, in __call__
| await self.middleware_stack(scope, receive, send)
| File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 778, in app
| await route.handle(scope, receive, send)
| File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 299, in handle
| await self.app(scope, receive, send)
| File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 79, in app
| await wrap_app_handling_exceptions(app, request)(scope, receive, send)
| File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 64, in wrapped_app
| raise exc
| File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
| await app(scope, receive, sender)
| File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 77, in app
| await response(scope, receive, send)
| File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 257, in __call__
| async with anyio.create_task_group() as task_group:
| File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 678, in __aexit__
| raise BaseExceptionGroup(
| exceptiongroup.ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception)
+-+---------------- 1 ----------------
| Traceback (most recent call last):
| File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 260, in wrap
| await func()
| File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 249, in stream_response
| async for chunk in self.body_iterator:
| File "/usr/local/lib/python3.10/dist-packages/starlette/concurrency.py", line 65, in iterate_in_threadpool
| yield await anyio.to_thread.run_sync(_next, as_iterator)
| File "/usr/local/lib/python3.10/dist-packages/anyio/to_thread.py", line 56, in run_sync
| return await get_async_backend().run_sync_in_worker_thread(
| File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
| return await future
| File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 851, in run
| result = context.run(func, *args)
| File "/usr/local/lib/python3.10/dist-packages/starlette/concurrency.py", line 54, in _next
| return next(iterator)
| File "/opt/RetrievalAugmentedGeneration/common/server.py", line 288, in response_generator
| for chunk in generator:
| File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 2446, in stream
| yield from self.transform(iter([input]), config, **kwargs)
| File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 2433, in transform
| yield from self._transform_stream_with_config(
| File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 1513, in _transform_stream_with_config
| chunk: Output = context.run(next, iterator) # type: ignore
| File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 2397, in _transform
| for output in final_pipeline:
| File "/usr/local/lib/python3.10/dist-packages/langchain_core/output_parsers/transform.py", line 50, in transform
| yield from self._transform_stream_with_config(
| File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 1489, in _transform_stream_with_config
| final_input: Optional[Input] = next(input_for_tracing, None)
| File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 1061, in transform
| yield from self.stream(final, config, **kwargs)
| File "/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/chat_models.py", line 250, in stream
| raise e
| File "/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/chat_models.py", line 234, in stream
| for chunk in self._stream(
| File "/usr/local/lib/python3.10/dist-packages/langchain_nvidia_ai_endpoints/chat_models.py", line
When trying out inference with locally deployed model named
ai-mixtral-8x7b-instruct
on nim using Langhcain NVIDIA AI Connector, I am unable to get response from LLM.When I inspected it turned out, if you host the model with name
ai-mixtral-8x7b-instruct
(i.e. the name which we're using for api-catalog inference), then the call to NIM fails as it tries to call mistral/mixtral-8x7b-instruct which is the mapping.https://github.com/langchain-ai/langchain-nvidia/blob/main/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py#L107
Error Logs