qdrant / vector-db-benchmark

Framework for benchmarking vector search engines
https://qdrant.tech/benchmarks/
Apache License 2.0
270 stars 77 forks source link

Elastic client timeout should be configurable. #112

Open filipecosta90 opened 6 months ago

filipecosta90 commented 6 months ago

Here's a sample traceback for 504 Gateway Timeout server error's on elastic client when the config/vector size leads to longer merge operations.

103 adds a way of fixing/avoiding this issue.

Experiment elasticsearch-m-32-ef-256 - dbpedia-openai-1M-1536-angular interrupted
Traceback (most recent call last):
  File "/root/vector-db-benchmark/run.py", line 54, in run
    client.run_experiment(
  File "/root/vector-db-benchmark/engine/base_client/client.py", line 109, in run_experiment
    upload_stats = self.uploader.upload(
  File "/root/vector-db-benchmark/engine/base_client/upload.py", line 70, in upload
    post_upload_stats = self.post_upload(distance)
  File "/root/vector-db-benchmark/engine/clients/elasticsearch/upload.py", line 55, in post_upload
    cls.client.indices.forcemerge(
  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/utils.py", line 446, in wrapped
    return api(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/indices.py", line 1572, in forcemerge
    return self.perform_request(  # type: ignore[return-value]
  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/_base.py", line 389, in perform_request
    return self._client.perform_request(
  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/_base.py", line 320, in perform_request
    raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(
elasticsearch.ApiError: ApiError(504, '{\'ok\': False, \'message\': \'Post "https://172.18.128.211:18270/bench/_forcemerge?max_num_segments=1&wait_for_completion=true": net/http: timeout awaiting response headers\'}')
Traceback (most recent call last):

  File "/root/vector-db-benchmark/run.py", line 84, in <module>
    app()

  File "/root/vector-db-benchmark/run.py", line 79, in run
    raise e

  File "/root/vector-db-benchmark/run.py", line 54, in run
    client.run_experiment(

  File "/root/vector-db-benchmark/engine/base_client/client.py", line 109, in run_experiment
    upload_stats = self.uploader.upload(

  File "/root/vector-db-benchmark/engine/base_client/upload.py", line 70, in upload
    post_upload_stats = self.post_upload(distance)

  File "/root/vector-db-benchmark/engine/clients/elasticsearch/upload.py", line 55, in post_upload
    cls.client.indices.forcemerge(

  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/utils.py", line 446, in wrapped
    return api(*args, **kwargs)

  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/indices.py", line 1572, in forcemerge
    return self.perform_request(  # type: ignore[return-value]

  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/_base.py", line 389, in perform_request
    return self._client.perform_request(

  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/_base.py", line 320, in perform_request
    raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(

elasticsearch.ApiError: ApiError(504, '{\'ok\': False, \'message\': \'Post "https://172.18.128.211:18270/bench/_forcemerge?max_num_segments=1&wait_for_completion=true": net/http: timeout awaiting response headers\'}')

Here's another example on control plane operations (index creation)

Experiment stage: Configure
Experiment elasticsearch-m-32-ef-128 - deep-image-96-angular interrupted
Traceback (most recent call last):
  File "/root/vector-db-benchmark/run.py", line 54, in run
    client.run_experiment(
  File "/root/vector-db-benchmark/engine/base_client/client.py", line 106, in run_experiment
    self.configurator.configure(dataset)
  File "/root/vector-db-benchmark/engine/base_client/configure.py", line 22, in configure
    return self.recreate(dataset, self.collection_params) or {}
  File "/root/vector-db-benchmark/engine/clients/elasticsearch/configure.py", line 40, in recreate
    self.client.indices.create(
  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/utils.py", line 446, in wrapped
    return api(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/indices.py", line 509, in create
    return self.perform_request(  # type: ignore[return-value]
  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/_base.py", line 389, in perform_request
    return self._client.perform_request(
  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/_base.py", line 320, in perform_request
    raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(
elasticsearch.ApiError: ApiError(503, 'process_cluster_event_timeout_exception', 'failed to process cluster event (create-index [bench], cause [api]) within 30s')
Traceback (most recent call last):

  File "/root/vector-db-benchmark/run.py", line 84, in <module>
    app()

  File "/root/vector-db-benchmark/run.py", line 79, in run
    raise e

  File "/root/vector-db-benchmark/run.py", line 54, in run
    client.run_experiment(

  File "/root/vector-db-benchmark/engine/base_client/client.py", line 106, in run_experiment
    self.configurator.configure(dataset)

  File "/root/vector-db-benchmark/engine/base_client/configure.py", line 22, in configure
    return self.recreate(dataset, self.collection_params) or {}

  File "/root/vector-db-benchmark/engine/clients/elasticsearch/configure.py", line 40, in recreate
    self.client.indices.create(

  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/utils.py", line 446, in wrapped
    return api(*args, **kwargs)

  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/indices.py", line 509, in create
    return self.perform_request(  # type: ignore[return-value]

  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/_base.py", line 389, in perform_request
    return self._client.perform_request(

  File "/usr/local/lib/python3.10/dist-packages/elasticsearch/_sync/client/_base.py", line 320, in perform_request
    raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(

elasticsearch.ApiError: ApiError(503, 'process_cluster_event_timeout_exception', 'failed to process cluster event (create-index [bench], cause [api]) within 30s')