langchain-ai / langchain-nvidia

MIT License
49 stars 15 forks source link

LLM Inference fails when inferencing with locally deployed model having same name as API Catalog model #31

Closed nv-pranjald closed 3 months ago

nv-pranjald commented 4 months ago

When trying out inference with locally deployed model named ai-mixtral-8x7b-instruct on nim using Langhcain NVIDIA AI Connector, I am unable to get response from LLM.

When I inspected it turned out, if you host the model with name ai-mixtral-8x7b-instruct (i.e. the name which we're using for api-catalog inference), then the call to NIM fails as it tries to call mistral/mixtral-8x7b-instruct which is the mapping.

    "ai-mixtral-8x7b-instruct": {
        "model_type": "chat",
        "model_name": "mistralai/mixtral-8x7b-instruct-v0.1",

Error Logs

ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/starlette/", line 264, in __call__
    await wrap(partial(self.listen_for_disconnect, receive))
  File "/usr/local/lib/python3.10/dist-packages/starlette/", line 260, in wrap
    await func()
  File "/usr/local/lib/python3.10/dist-packages/starlette/", line 237, in listen_for_disconnect
    message = await receive()
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/http/", line 580, in receive
    await self.message_event.wait()
  File "/usr/lib/python3.10/asyncio/", line 214, in wait
    await fut
asyncio.exceptions.CancelledError: Cancelled by cancel scope 795b98517190
During handling of the above exception, another exception occurred:
  + Exception Group Traceback (most recent call last):
  |   File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/http/", line 419, in run_asgi
  |     result = await app(  # type: ignore[func-returns-value]
  |   File "/usr/local/lib/python3.10/dist-packages/uvicorn/middleware/", line 84, in __call__
  |     return await, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/fastapi/", line 1054, in __call__
  |     await super().__call__(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 123, in __call__
  |     await self.middleware_stack(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/", line 186, in __call__
  |     raise exc
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/", line 164, in __call__
  |     await, receive, _send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/", line 83, in __call__
  |     await, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/", line 62, in __call__
  |     await wrap_app_handling_exceptions(, conn)(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 64, in wrapped_app
  |     raise exc
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 53, in wrapped_app
  |     await app(scope, receive, sender)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 758, in __call__
  |     await self.middleware_stack(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 778, in app
  |     await route.handle(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 299, in handle
  |     await, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 79, in app
  |     await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 64, in wrapped_app
  |     raise exc
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 53, in wrapped_app
  |     await app(scope, receive, sender)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 77, in app
  |     await response(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 257, in __call__
  |     async with anyio.create_task_group() as task_group:
  |   File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/", line 678, in __aexit__
  |     raise BaseExceptionGroup(
  | exceptiongroup.ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception)
  +-+---------------- 1 ----------------
    | Traceback (most recent call last):
    |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 260, in wrap
    |     await func()
    |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 249, in stream_response
    |     async for chunk in self.body_iterator:
    |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 65, in iterate_in_threadpool
    |     yield await anyio.to_thread.run_sync(_next, as_iterator)
    |   File "/usr/local/lib/python3.10/dist-packages/anyio/", line 56, in run_sync
    |     return await get_async_backend().run_sync_in_worker_thread(
    |   File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/", line 2144, in run_sync_in_worker_thread
    |     return await future
    |   File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/", line 851, in run
    |     result =, *args)
    |   File "/usr/local/lib/python3.10/dist-packages/starlette/", line 54, in _next
    |     return next(iterator)
    |   File "/opt/RetrievalAugmentedGeneration/common/", line 288, in response_generator
    |     for chunk in generator:
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/", line 2446, in stream
    |     yield from self.transform(iter([input]), config, **kwargs)
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/", line 2433, in transform
    |     yield from self._transform_stream_with_config(
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/", line 1513, in _transform_stream_with_config
    |     chunk: Output =, iterator)  # type: ignore
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/", line 2397, in _transform
    |     for output in final_pipeline:
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/output_parsers/", line 50, in transform
    |     yield from self._transform_stream_with_config(
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/", line 1489, in _transform_stream_with_config
    |     final_input: Optional[Input] = next(input_for_tracing, None)
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/", line 1061, in transform
    |     yield from, config, **kwargs)
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/", line 250, in stream
    |     raise e
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/", line 234, in stream
    |     for chunk in self._stream(
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_nvidia_ai_endpoints/", line