danswer-ai / danswer

Gen-AI Chat for Teams - Think ChatGPT if it had access to your team's unique knowledge.
https://danswer.ai
Other
10.57k stars 1.32k forks source link

Indexing Failure: Empty strings are not allowed for embedding #2455

Closed emerzon closed 2 weeks ago

emerzon commented 1 month ago

We're seeing some indexing attempt failures with this exception. Apparently in some cases we're trying to embed an empty string, that makes the whole indexing process fail. Maybe we should add a check somewhere?

Traceback (most recent call last):
  File "/app/danswer/natural_language_processing/search_nlp_models.py", line 120, in _make_request
    response.raise_for_status()
  File "/usr/local/lib/python3.11/site-packages/requests/models.py", line 1024, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 500 Server Error: Internal Server Error for url: http://inference-model-server-service:9000/encoder/bi-encoder-embed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/app/danswer/background/indexing/run_indexing.py", line 219, in _run_indexing
    new_docs, total_batch_chunks = indexing_pipeline(
                                   ^^^^^^^^^^^^^^^^^^
  File "/app/danswer/indexing/indexing_pipeline.py", line 142, in index_doc_batch_with_handler
    r = index_doc_batch(
        ^^^^^^^^^^^^^^^^
  File "/app/danswer/utils/timing.py", line 31, in wrapped_func
    result = func(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^
  File "/app/danswer/indexing/indexing_pipeline.py", line 286, in index_doc_batch
    embedder.embed_chunks(
  File "/app/danswer/utils/timing.py", line 31, in wrapped_func
    result = func(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^
  File "/app/danswer/indexing/embedder.py", line 113, in embed_chunks
    embeddings = self.embedding_model.encode(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/app/danswer/natural_language_processing/search_nlp_models.py", line 210, in encode
    return self._batch_encode_texts(
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/app/danswer/natural_language_processing/search_nlp_models.py", line 167, in _batch_encode_texts
    response = self._make_model_server_request(embed_request)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/app/danswer/natural_language_processing/search_nlp_models.py", line 134, in _make_model_server_request
    return retry(tries=3, delay=5)(_make_request)()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/decorator.py", line 232, in fun
    return caller(func, *(extras + args), **kw)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/retry/api.py", line 73, in retry_decorator
    return __retry_internal(partial(f, *args, **kwargs), exceptions, tries, delay, max_delay, backoff, jitter,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/retry/api.py", line 33, in __retry_internal
    return f()
           ^^^
  File "/app/danswer/natural_language_processing/search_nlp_models.py", line 126, in _make_request
    raise HTTPError(f"HTTP error occurred: {error_detail}") from e
httpx.HTTPError: HTTP error occurred: Internal Server Error

And the corresponding exception in the inference-model:

ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/site-packages/uvicorn/protocols/http/h11_impl.py", line 429, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 78, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.11/site-packages/starlette/applications.py", line 123, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.11/site-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/usr/local/lib/python3.11/site-packages/starlette/middleware/errors.py", line 164, in __call__
    await self.app(scope, receive, _send)
  File "/usr/local/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
    await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
    raise exc
  File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
    await app(scope, receive, sender)
  File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 758, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 778, in app
    await route.handle(scope, receive, send)
  File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 299, in handle
    await self.app(scope, receive, send)
  File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 79, in app
    await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
    raise exc
  File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
    await app(scope, receive, sender)
  File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 74, in app
    response = await func(request)
               ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/fastapi/routing.py", line 299, in app
    raise e
  File "/usr/local/lib/python3.11/site-packages/fastapi/routing.py", line 294, in app
    raw_response = await run_endpoint_function(
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/fastapi/routing.py", line 191, in run_endpoint_function
    return await dependant.call(**values)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/app/model_server/encoders.py", line 394, in process_embed_request
    raise ValueError("Empty strings are not allowed for embedding.")
ValueError: Empty strings are not allowed for embedding.
emerzon commented 2 weeks ago

Fixed by #2853