Hi, I'm trying to test the benchmark code and run into the following error.
=====================================
Code:
from vectordb_bench.backend.dataset import Dataset, DatasetManager
import numpy as np
size = 500000
data_name = 'openai'
data = Dataset.OPENAI.manager(size)
data.prepare(check=False)
====================================
data.prepare(check=False) raises the following error:
2024-03-01 00:37:05,316 | INFO: local file: \tmp\vectordb_bench\dataset\openai\openai_medium_500k\test.parquet not match with remote: assets.zilliz.com\benchmark\openai_medium_500k\test.parquet; add to downloading list (data_source.py:136) (4644)
2024-03-01 00:37:05,316 | INFO: local file: \tmp\vectordb_bench\dataset\openai\openai_medium_500k\neighbors.parquet not match with remote: assets.zilliz.com\benchmark\openai_medium_500k\neighbors.parquet; add to downloading list (data_source.py:136) (4644)
2024-03-01 00:37:05,316 | INFO: local file: \tmp\vectordb_bench\dataset\openai\openai_medium_500k\neighbors_tail_1p.parquet not match with remote: assets.zilliz.com\benchmark\openai_medium_500k\neighbors_tail_1p.parquet; add to downloading list (data_source.py:136) (4644)
2024-03-01 00:37:05,316 | INFO: local file: \tmp\vectordb_bench\dataset\openai\openai_medium_500k\neighbors_head_1p.parquet not match with remote: assets.zilliz.com\benchmark\openai_medium_500k\neighbors_head_1p.parquet; add to downloading list (data_source.py:136) (4644)
2024-03-01 00:37:05,316 | INFO: local file: \tmp\vectordb_bench\dataset\openai\openai_medium_500k\shuffle_train.parquet not match with remote: assets.zilliz.com\benchmark\openai_medium_500k\shuffle_train.parquet; add to downloading list (data_source.py:136) (4644)
2024-03-01 00:37:05,316 | INFO: Start to downloading files, total count: 5 (data_source.py:142) (4644)
0%| | 0/5 [00:00<?, ?it/s]
Traceback (most recent call last):
File ~\anaconda3\envs\env1\Lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec
exec(code, globals, locals)
File ~\anaconda3\envs\env1\Lib\site-packages\vectordb_bench\backend\dataset.py:202 in prepare
source.reader().read(
File ~\anaconda3\envs\env1\Lib\site-packages\vectordb_bench\backend\data_source.py:145 in read
self.fs.download(s3_file, local_ds_root.as_posix())
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\spec.py:1534 in download
return self.get(rpath, lpath, recursive=recursive, **kwargs)
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\asyn.py:118 in wrapper
return sync(self.loop, func, *args, **kwargs)
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\asyn.py:103 in sync
raise return_result
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\asyn.py:56 in _runner
result[0] = await coro
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\asyn.py:650 in _get
return await _run_coros_in_chunks(
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\asyn.py:254 in _run_coros_in_chunks
await asyncio.gather(*chunk, return_exceptions=return_exceptions),
File ~\anaconda3\envs\env1\Lib\asyncio\tasks.py:452 in wait_for
return await fut
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\callbacks.py:81 in func
return await fn(path1, path2, callback=child, **kwargs)
File ~\anaconda3\envs\env1\Lib\site-packages\s3fs\core.py:1224 in _get_file
body, content_length = await _open_file(range=0)
File ~\anaconda3\envs\env1\Lib\site-packages\s3fs\core.py:1215 in _open_file
resp = await self._call_s3(
File ~\anaconda3\envs\env1\Lib\site-packages\s3fs\core.py:348 in _call_s3
return await _error_wrapper(
File ~\anaconda3\envs\env1\Lib\site-packages\s3fs\core.py:140 in _error_wrapper
raise err
File ~\anaconda3\envs\env1\Lib\site-packages\s3fs\core.py:113 in _error_wrapper
return await func(*args, **kwargs)
File ~\anaconda3\envs\env1\Lib\site-packages\aiobotocore\client.py:345 in _make_api_call
api_params = await self._emit_api_params(
File ~\anaconda3\envs\env1\Lib\site-packages\aiobotocore\client.py:475 in _emit_api_params
await self.meta.events.emit(
File ~\anaconda3\envs\env1\Lib\site-packages\aiobotocore\hooks.py:66 in _emit
response = await resolve_awaitable(handler(**kwargs))
File ~\anaconda3\envs\env1\Lib\site-packages\botocore\handlers.py:288 in validate_bucket_name
raise ParamValidationError(report=error_msg)
ParamValidationError: Parameter validation failed:
Invalid bucket name "assets.zilliz.com\benchmark\openai_medium500k\test.parquet": Bucket name must match the regex "^[a-zA-Z0-9.-]{1,255}$" or be an ARN matching the regex "^arn:(aws).:(s3|s3-object-lambda):[a-z-0-9]:[0-9]{12}:accesspoint[/:][a-zA-Z0-9-.]{1,63}$|^arn:(aws).*:s3-outposts:[a-z-0-9]+:[0-9]{12}:outpost[/:][a-zA-Z0-9-]{1,63}[/:]accesspoint[/:][a-zA-Z0-9-]{1,63}$"
===========================
Operating system: Windows 11, Anaconda env with Python 3.11
Hi, I'm trying to test the benchmark code and run into the following error.
=====================================
Code: from vectordb_bench.backend.dataset import Dataset, DatasetManager import numpy as np
size = 500000 data_name = 'openai'
data = Dataset.OPENAI.manager(size) data.prepare(check=False)
==================================== data.prepare(check=False) raises the following error:
2024-03-01 00:37:05,316 | INFO: local file: \tmp\vectordb_bench\dataset\openai\openai_medium_500k\test.parquet not match with remote: assets.zilliz.com\benchmark\openai_medium_500k\test.parquet; add to downloading list (data_source.py:136) (4644) 2024-03-01 00:37:05,316 | INFO: local file: \tmp\vectordb_bench\dataset\openai\openai_medium_500k\neighbors.parquet not match with remote: assets.zilliz.com\benchmark\openai_medium_500k\neighbors.parquet; add to downloading list (data_source.py:136) (4644) 2024-03-01 00:37:05,316 | INFO: local file: \tmp\vectordb_bench\dataset\openai\openai_medium_500k\neighbors_tail_1p.parquet not match with remote: assets.zilliz.com\benchmark\openai_medium_500k\neighbors_tail_1p.parquet; add to downloading list (data_source.py:136) (4644) 2024-03-01 00:37:05,316 | INFO: local file: \tmp\vectordb_bench\dataset\openai\openai_medium_500k\neighbors_head_1p.parquet not match with remote: assets.zilliz.com\benchmark\openai_medium_500k\neighbors_head_1p.parquet; add to downloading list (data_source.py:136) (4644) 2024-03-01 00:37:05,316 | INFO: local file: \tmp\vectordb_bench\dataset\openai\openai_medium_500k\shuffle_train.parquet not match with remote: assets.zilliz.com\benchmark\openai_medium_500k\shuffle_train.parquet; add to downloading list (data_source.py:136) (4644) 2024-03-01 00:37:05,316 | INFO: Start to downloading files, total count: 5 (data_source.py:142) (4644) 0%| | 0/5 [00:00<?, ?it/s] Traceback (most recent call last):
File ~\anaconda3\envs\env1\Lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec exec(code, globals, locals)
File c:\users\bdfzl\desktop\env1\vdb_bench.py:9 data.prepare(check=False)
File ~\anaconda3\envs\env1\Lib\site-packages\vectordb_bench\backend\dataset.py:202 in prepare source.reader().read(
File ~\anaconda3\envs\env1\Lib\site-packages\vectordb_bench\backend\data_source.py:145 in read self.fs.download(s3_file, local_ds_root.as_posix())
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\spec.py:1534 in download return self.get(rpath, lpath, recursive=recursive, **kwargs)
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\asyn.py:118 in wrapper return sync(self.loop, func, *args, **kwargs)
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\asyn.py:103 in sync raise return_result
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\asyn.py:56 in _runner result[0] = await coro
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\asyn.py:650 in _get return await _run_coros_in_chunks(
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\asyn.py:254 in _run_coros_in_chunks await asyncio.gather(*chunk, return_exceptions=return_exceptions),
File ~\anaconda3\envs\env1\Lib\asyncio\tasks.py:452 in wait_for return await fut
File ~\anaconda3\envs\env1\Lib\site-packages\fsspec\callbacks.py:81 in func return await fn(path1, path2, callback=child, **kwargs)
File ~\anaconda3\envs\env1\Lib\site-packages\s3fs\core.py:1224 in _get_file body, content_length = await _open_file(range=0)
File ~\anaconda3\envs\env1\Lib\site-packages\s3fs\core.py:1215 in _open_file resp = await self._call_s3(
File ~\anaconda3\envs\env1\Lib\site-packages\s3fs\core.py:348 in _call_s3 return await _error_wrapper(
File ~\anaconda3\envs\env1\Lib\site-packages\s3fs\core.py:140 in _error_wrapper raise err
File ~\anaconda3\envs\env1\Lib\site-packages\s3fs\core.py:113 in _error_wrapper return await func(*args, **kwargs)
File ~\anaconda3\envs\env1\Lib\site-packages\aiobotocore\client.py:345 in _make_api_call api_params = await self._emit_api_params(
File ~\anaconda3\envs\env1\Lib\site-packages\aiobotocore\client.py:475 in _emit_api_params await self.meta.events.emit(
File ~\anaconda3\envs\env1\Lib\site-packages\aiobotocore\hooks.py:66 in _emit response = await resolve_awaitable(handler(**kwargs))
File ~\anaconda3\envs\env1\Lib\site-packages\botocore\handlers.py:288 in validate_bucket_name raise ParamValidationError(report=error_msg)
ParamValidationError: Parameter validation failed: Invalid bucket name "assets.zilliz.com\benchmark\openai_medium500k\test.parquet": Bucket name must match the regex "^[a-zA-Z0-9.-]{1,255}$" or be an ARN matching the regex "^arn:(aws).:(s3|s3-object-lambda):[a-z-0-9]:[0-9]{12}:accesspoint[/:][a-zA-Z0-9-.]{1,63}$|^arn:(aws).*:s3-outposts:[a-z-0-9]+:[0-9]{12}:outpost[/:][a-zA-Z0-9-]{1,63}[/:]accesspoint[/:][a-zA-Z0-9-]{1,63}$"
===========================
Operating system: Windows 11, Anaconda env with Python 3.11
=================== ======== Installation: pip install vectordb-bench[all]
Thank you for the help.