Closed latishab closed 12 months ago
Hi @latishab it may has to do with your environment - could you try running it from google colab environment here? https://colab.research.google.com/github/bentoml/OpenLLM/blob/main/examples/openllm-llama2-demo/openllm_llama2_demo.ipynb
I
Hi @latishab it may has to do with your environment - could you try running it from google colab environment here? https://colab.research.google.com/github/bentoml/OpenLLM/blob/main/examples/openllm-llama2-demo/openllm_llama2_demo.ipynb
I cannot run on Colab as well, it gave me this:
WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:
PyTorch 2.0.1+cu118 with CUDA 1108 (you have 2.1.0+cu118)
Python 3.10.13 (you have 3.10.12)
Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
Memory-efficient attention, SwiGLU, sparse and more won't be available.
Set XFORMERS_MORE_DETAILS=1 for more details
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/openllm/_assign.py", line 55, in inner
return vllm.AsyncLLMEngine.from_engine_args(
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 487, in from_engine_args
engine = cls(engine_args.worker_use_ray,
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 270, in init
self.engine = self._init_engine(*args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 306, in _init_engine
return engine_class(*args, *kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py", line 113, in init
self._init_cache()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py", line 193, in _init_cache
num_blocks = self._run_workers(
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py", line 700, in _run_workers
output = executor(args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker.py", line 111, in profile_num_available_blocks
self.model(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
if result is not None:
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
result = hook(self, args)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/llama.py", line 302, in forward
hidden_states = self.model(input_ids, positions, kv_caches,
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
if result is not None:
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
result = hook(self, args)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/llama.py", line 263, in forward
hidden_states = layer(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
if result is not None:
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
result = hook(self, args)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/llama.py", line 208, in forward
hidden_states = self.self_attn(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
if result is not None:
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
result = hook(self, args)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/llama.py", line 157, in forward
attn_output = self.attn(positions, q, k, v, k_cache, v_cache,
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
if result is not None:
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
result = hook(self, args)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/layers/attention.py", line 389, in forward
return super().forward(
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/layers/attention.py", line 266, in forward
self.multi_query_kv_attention(
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/layers/attention.py", line 123, in multi_query_kv_attention
out = xops.memory_efficient_attention_forward(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/init.py", line 244, in memory_efficient_attention_forward
return _memory_efficient_attention_forward(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/init.py", line 337, in _memory_efficient_attention_forward
op = _dispatch_fw(inp, False)
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/dispatch.py", line 120, in _dispatch_fw
return _run_priority_list(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/dispatch.py", line 63, in _run_priority_list
raise NotImplementedError(msg)
NotImplementedError: No operator found for memory_efficient_attention_forward
with inputs:
query : shape=(1, 4096, 32, 128) (torch.float16)
key : shape=(1, 4096, 32, 128) (torch.float16)
value : shape=(1, 4096, 32, 128) (torch.float16)
attn_bias : <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalMask'>
p : 0.0
decoderF
is not supported because:
xFormers wasn't build with CUDA support
attn_bias type is <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalMask'>
operator wasn't built - see python -m xformers.info
for more info
flshattF@0.0.0
is not supported because:
xFormers wasn't build with CUDA support
requires device with capability > (8, 0) but your GPU has capability (7, 5) (too old)
operator wasn't built - see python -m xformers.info
for more info
tritonflashattF
is not supported because:
xFormers wasn't build with CUDA support
requires device with capability > (8, 0) but your GPU has capability (7, 5) (too old)
attn_bias type is <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalMask'>
operator wasn't built - see python -m xformers.info
for more info
triton is not available
requires GPU with sm80 minimum compute capacity, e.g., A100/H100/L4
cutlassF
is not supported because:
xFormers wasn't build with CUDA support
operator wasn't built - see python -m xformers.info
for more info
smallkF
is not supported because:
max(query.shape[-1] != value.shape[-1]) > 32
xFormers wasn't build with CUDA support
dtype=torch.float16 (supported: {torch.float32})
attn_bias type is <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalMask'>
has custom scale
operator wasn't built - see python -m xformers.info
for more info
unsupported embed per head: 128
An exception occurred while instantiating runner 'llm-llama-runner', see details below:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/bentoml/_internal/runner/runner.py", line 307, in init_local
self._set_handle(LocalRunnerRef)
File "/usr/local/lib/python3.10/dist-packages/bentoml/_internal/runner/runner.py", line 150, in _set_handle
runner_handle = handle_class(self, args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/bentoml/_internal/runner/runner_handle/local.py", line 27, in init
self._runnable = runner.runnable_class(runner.runnable_init_params) # type: ignore
File "/usr/local/lib/python3.10/dist-packages/openllm/_llm.py", line 1132, in init
if not self.model: raise RuntimeError('Failed to load the model correctly (See traceback above)')
File "/usr/local/lib/python3.10/dist-packages/openllm/_llm.py", line 710, in model
model = self.load_model(self._model_decls, self._model_attrs)
File "/usr/local/lib/python3.10/dist-packages/openllm/_assign.py", line 66, in inner
raise OpenLLMException(f'Failed to initialise vLLMEngine due to the following error:\n{err}') from None
openllm_core.exceptions.OpenLLMException: Failed to initialise vLLMEngine due to the following error:
No operator found for memory_efficient_attention_forward
with inputs:
query : shape=(1, 4096, 32, 128) (torch.float16)
key : shape=(1, 4096, 32, 128) (torch.float16)
value : shape=(1, 4096, 32, 128) (torch.float16)
attn_bias : <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalMask'>
p : 0.0
decoderF
is not supported because:
xFormers wasn't build with CUDA support
attn_bias type is <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalMask'>
operator wasn't built - see python -m xformers.info
for more info
flshattF@0.0.0
is not supported because:
xFormers wasn't build with CUDA support
requires device with capability > (8, 0) but your GPU has capability (7, 5) (too old)
operator wasn't built - see python -m xformers.info
for more info
tritonflashattF
is not supported because:
xFormers wasn't build with CUDA support
requires device with capability > (8, 0) but your GPU has capability (7, 5) (too old)
attn_bias type is <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalMask'>
operator wasn't built - see python -m xformers.info
for more info
triton is not available
requires GPU with sm80 minimum compute capacity, e.g., A100/H100/L4
cutlassF
is not supported because:
xFormers wasn't build with CUDA support
operator wasn't built - see python -m xformers.info
for more info
smallkF
is not supported because:
max(query.shape[-1] != value.shape[-1]) > 32
xFormers wasn't build with CUDA support
dtype=torch.float16 (supported: {torch.float32})
attn_bias type is <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalMask'>
has custom scale
operator wasn't built - see python -m xformers.info
for more info
unsupported embed per head: 128
OpenLLMException Traceback (most recent call last)
Sorry, but this seems to be purely environment setup issue. I can run falcon models successfully.
Feel free to reopen if there is a different issue arise.
(ol) C:\Users\SNS>openllm start falcon --model-id tiiuae/falcon-7b Downloading (…)lve/main/config.json: 100%|████████████████████████████████████████████████| 1.05k/1.05k [00:00<?, ?B/s] C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\huggingface_hub\file_download.py:137: UserWarning:
huggingface_hub
cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\Users\SNS.cache\huggingface\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting theHF_HUB_DISABLE_SYMLINKS_WARNING
environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations. To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development warnings.warn(message) Downloading (…)figuration_falcon.py: 100%|████████████████████████████████████████| 7.16k/7.16k [00:00<00:00, 6.88MB/s]WARNING: You are currently loading Falcon using legacy code contained in the model repository. Falcon has now been fully ported into the Hugging Face transformers library. For the most up-to-date and high-performance version of the Falcon model code, please update to the latest version of transformers and then load the model without the trust_remote_code=True argument.
Downloading (…)okenizer_config.json: 100%|████████████████████████████████████████████████████| 287/287 [00:00<?, ?B/s] Downloading (…)/main/tokenizer.json: 100%|████████████████████████████████████████| 2.73M/2.73M [00:01<00:00, 1.80MB/s] Downloading (…)cial_tokens_map.json: 100%|████████████████████████████████████████████████████| 281/281 [00:00<?, ?B/s] Downloading (…)6/modeling_falcon.py: 100%|████████████████████████████████████████| 56.9k/56.9k [00:00<00:00, 2.25MB/s] Downloading (…)neration_config.json: 100%|████████████████████████████████████████████| 117/117 [00:00<00:00, 34.7kB/s] Downloading (…)model.bin.index.json: 100%|████████████████████████████████████████| 16.9k/16.9k [00:00<00:00, 8.46MB/s] Fetching 10 files: 40%|█████████████████████████▏ | 4/10 [17:43<26:35, 265.84s/it] Downloading (…)l-00002-of-00002.bin: 100%|████████████████████████████████████████| 4.48G/4.48G [50:47<00:00, 1.47MB/s] Traceback (most recent call last):n: 100%|████████████████████████████████████████| 4.48G/4.48G [50:47<00:00, 1.79MB/s] File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\serialisation\transformers__init__.py", line 147, in get model = bentoml.models.get(llm.tag) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\simpledi__init__.py", line 139, in return func(*_inject_args(bind.args), **_inject_kwargs(bind.kwargs)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\bentoml\models.py", line 45, in get return _model_store.get(tag) ^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\bentoml_internal\store.py", line 158, in get raise NotFound( bentoml.exceptions.NotFound: Model 'pt-tiiuae--falcon-7b:898df1396f35e447d5fe44e0a3ccaaaa69f30d36' is not found in BentoML store <osfs 'C:\Users\SNS\bentoml\models'>, you may need to run
bentoml models pull
firstThe above exception was the direct cause of the following exception:
Traceback (most recent call last): File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\cli\entrypoint.py", line 416, in import_command _ref = openllm.serialisation.get(llm) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\serialisation__init__.py", line 75, in caller return getattr(importlib.import_module(f'.{serde}', name), fn)(llm, *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\serialisation\transformers__init__.py", line 155, in get raise openllm.exceptions.OpenLLMException(f'Failed while getting stored artefact (lookup for traceback):\n{err}') from err openllm_core.exceptions.OpenLLMException: Failed while getting stored artefact (lookup for traceback): Model 'pt-tiiuae--falcon-7b:898df1396f35e447d5fe44e0a3ccaaaa69f30d36' is not found in BentoML store <osfs 'C:\Users\SNS\bentoml\models'>, you may need to run
bentoml models pull
firstDuring handling of the above exception, another exception occurred:
Traceback (most recent call last): File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\serialisation\transformers__init__.py", line 147, in get model = bentoml.models.get(llm.tag) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\simpledi__init__.py", line 139, in return func(*_inject_args(bind.args), **_inject_kwargs(bind.kwargs)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\bentoml\models.py", line 45, in get return _model_store.get(tag) ^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\bentoml_internal\store.py", line 158, in get raise NotFound( bentoml.exceptions.NotFound: Model 'pt-tiiuae--falcon-7b:898df1396f35e447d5fe44e0a3ccaaaa69f30d36' is not found in BentoML store <osfs 'C:\Users\SNS\bentoml\models'>, you may need to run
bentoml models pull
firstDuring handling of the above exception, another exception occurred:
Traceback (most recent call last): File "", line 198, in _run_module_as_main
File "", line 88, in _run_code
File "C:\Users\SNS\anaconda3\envs\ol\Scripts\openllm.exe__main.py", line 7, in
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\core.py", line 1157, in call
return self.main(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\core.py", line 1078, in main
rv = self.invoke(ctx)
^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\core.py", line 1434, in invoke
return ctx.invoke(self.callback, ctx.params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\core.py", line 783, in invoke
return callback(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\cli\entrypoint.py", line 196, in wrapper
return_value = func(*args, *attrs)
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\decorators.py", line 33, in new_func
return f(get_current_context(), args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\cli\entrypoint.py", line 178, in wrapper
return f(*args, attrs)
^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\decorators.py", line 33, in new_func
return f(get_current_context(), args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\cli_factory.py", line 179, in start_cmd
llm = openllm.utils.infer_auto_class(env['backend_value']).for_model(model,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\models\auto\factory.py", line 52, in for_model
if ensure_available: llm.save_pretrained()
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm_llm.py", line 672, in save_pretrained
return openllm.import_model(self.config['start_name'],
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\cli_sdk.py", line 262, in _import_model
return import_command.main(args=args, standalone_mode=False)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\core.py", line 1078, in main
rv = self.invoke(ctx)
^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\core.py", line 1434, in invoke
return ctx.invoke(self.callback, ctx.params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\core.py", line 783, in invoke
return __callback(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\cli\entrypoint.py", line 196, in wrapper
return_value = func(*args, attrs)
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\click\decorators.py", line 33, in new_func
return f(get_current_context(), *args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\cli\entrypoint.py", line 178, in wrapper
return f(args, attrs)
^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\cli\entrypoint.py", line 422, in import_command
_ref = openllm.serialisation.get(llm, auto_import=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\serialisation__init.py", line 75, in caller
return getattr(importlib.import_module(f'.{serde}', name), fn)(llm, *args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\serialisation\transformers__init.py", line 154, in get
if auto_import: return import_model(llm, trust_remote_code=llm.trust_remote_code)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\simpledi__init__.py", line 139, in
return func(*_inject_args(bind.args), **_inject_kwargs(bind.kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\openllm\serialisation\transformers\init__.py", line 124, in import_model
snapshot_download(llm.model_id, local_dir=bentomodel.path, local_dir_use_symlinks=False, ignore_patterns=HfIgnore.ignore_patterns(llm))
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\huggingface_hub\utils_validators.py", line 118, in _inner_fn
return fn(*args, kwargs)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\huggingface_hub_snapshot_download.py", line 239, in snapshot_download
thread_map(
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\tqdm\contrib\concurrent.py", line 69, in thread_map
return _executor_map(ThreadPoolExecutor, fn, *iterables, *tqdm_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\tqdm\contrib\concurrent.py", line 51, in _executor_map
return list(tqdm_class(ex.map(fn, iterables, chunksize=chunksize), **kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\tqdm\std.py", line 1182, in iter
for obj in iterable:
File "C:\Users\SNS\anaconda3\envs\ol\Lib\concurrent\futures_base.py", line 619, in result_iterator
yield _result_or_cancel(fs.pop())
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\concurrent\futures_base.py", line 317, in _result_or_cancel
return fut.result(timeout)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\concurrent\futures_base.py", line 456, in result
return self.get_result()
^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\concurrent\futures_base.py", line 401, in __get_result
raise self._exception
File "C:\Users\SNS\anaconda3\envs\ol\Lib\concurrent\futures\thread.py", line 58, in run
result = self.fn(*self.args, *self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\huggingface_hub_snapshot_download.py", line 214, in _inner_hf_hub_download
return hf_hub_download(
^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\huggingface_hub\utils_validators.py", line 118, in _inner_fn
return fn(args, *kwargs)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\huggingface_hub\file_download.py", line 1431, in hf_hub_download
http_get(
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\huggingface_hub\file_download.py", line 551, in http_get
for chunk in r.iter_content(chunk_size=10 1024 * 1024):
File "C:\Users\SNS\anaconda3\envs\ol\Lib\site-packages\requests\models.py", line 822, in generate
raise ConnectionError(e)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='cdn-lfs.huggingface.co', port=443): Read timed out.
Downloading (…)l-00001-of-00002.bin: 10%|███▊ | 954M/9.95G [50:47<7:58:56, 313kB/s]
(ol) C:\Users\SNS>
Whenever I try to run specific model id, it went into connection error or timeouts. What should I do? the "openllm start opt" did work well, but I do want to run specific model.