langchain-ai / langchain-nvidia

MIT License
49 stars 15 forks source link

LLM Inference fails when inferencing with locally deployed model having same name as API Catalog model #31

Closed nv-pranjald closed 3 months ago

nv-pranjald commented 4 months ago

When trying out inference with locally deployed model named ai-mixtral-8x7b-instruct on nim using Langhcain NVIDIA AI Connector, I am unable to get response from LLM.

When I inspected it turned out, if you host the model with name ai-mixtral-8x7b-instruct (i.e. the name which we're using for api-catalog inference), then the call to NIM fails as it tries to call mistral/mixtral-8x7b-instruct which is the mapping.

https://github.com/langchain-ai/langchain-nvidia/blob/main/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py#L107

    "ai-mixtral-8x7b-instruct": {
        "model_type": "chat",
        "model_name": "mistralai/mixtral-8x7b-instruct-v0.1",
    },

Error Logs

ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 264, in __call__
    await wrap(partial(self.listen_for_disconnect, receive))
  File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 260, in wrap
    await func()
  File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 237, in listen_for_disconnect
    message = await receive()
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 580, in receive
    await self.message_event.wait()
  File "/usr/lib/python3.10/asyncio/locks.py", line 214, in wait
    await fut
asyncio.exceptions.CancelledError: Cancelled by cancel scope 795b98517190
During handling of the above exception, another exception occurred:
  + Exception Group Traceback (most recent call last):
  |   File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 419, in run_asgi
  |     result = await app(  # type: ignore[func-returns-value]
  |   File "/usr/local/lib/python3.10/dist-packages/uvicorn/middleware/proxy_headers.py", line 84, in __call__
  |     return await self.app(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/fastapi/applications.py", line 1054, in __call__
  |     await super().__call__(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/applications.py", line 123, in __call__
  |     await self.middleware_stack(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/errors.py", line 186, in __call__
  |     raise exc
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/errors.py", line 164, in __call__
  |     await self.app(scope, receive, _send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/cors.py", line 83, in __call__
  |     await self.app(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/exceptions.py", line 62, in __call__
  |     await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 64, in wrapped_app
  |     raise exc
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  |     await app(scope, receive, sender)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 758, in __call__
  |     await self.middleware_stack(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 778, in app
  |     await route.handle(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 299, in handle
  |     await self.app(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 79, in app
  |     await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 64, in wrapped_app
  |     raise exc
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  |     await app(scope, receive, sender)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 77, in app
  |     await response(scope, receive, send)
  |   File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 257, in __call__
  |     async with anyio.create_task_group() as task_group:
  |   File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 678, in __aexit__
  |     raise BaseExceptionGroup(
  | exceptiongroup.ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception)
  +-+---------------- 1 ----------------
    | Traceback (most recent call last):
    |   File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 260, in wrap
    |     await func()
    |   File "/usr/local/lib/python3.10/dist-packages/starlette/responses.py", line 249, in stream_response
    |     async for chunk in self.body_iterator:
    |   File "/usr/local/lib/python3.10/dist-packages/starlette/concurrency.py", line 65, in iterate_in_threadpool
    |     yield await anyio.to_thread.run_sync(_next, as_iterator)
    |   File "/usr/local/lib/python3.10/dist-packages/anyio/to_thread.py", line 56, in run_sync
    |     return await get_async_backend().run_sync_in_worker_thread(
    |   File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
    |     return await future
    |   File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 851, in run
    |     result = context.run(func, *args)
    |   File "/usr/local/lib/python3.10/dist-packages/starlette/concurrency.py", line 54, in _next
    |     return next(iterator)
    |   File "/opt/RetrievalAugmentedGeneration/common/server.py", line 288, in response_generator
    |     for chunk in generator:
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 2446, in stream
    |     yield from self.transform(iter([input]), config, **kwargs)
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 2433, in transform
    |     yield from self._transform_stream_with_config(
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 1513, in _transform_stream_with_config
    |     chunk: Output = context.run(next, iterator)  # type: ignore
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 2397, in _transform
    |     for output in final_pipeline:
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/output_parsers/transform.py", line 50, in transform
    |     yield from self._transform_stream_with_config(
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 1489, in _transform_stream_with_config
    |     final_input: Optional[Input] = next(input_for_tracing, None)
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py", line 1061, in transform
    |     yield from self.stream(final, config, **kwargs)
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/chat_models.py", line 250, in stream
    |     raise e
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/chat_models.py", line 234, in stream
    |     for chunk in self._stream(
    |   File "/usr/local/lib/python3.10/dist-packages/langchain_nvidia_ai_endpoints/chat_models.py", line