bentoml / OpenLLM

Run any open-source LLMs, such as Llama, Gemma, as OpenAI compatible API endpoint in the cloud.
https://bentoml.com
Apache License 2.0
10k stars 636 forks source link

Error installing and running Falcon Models #83

Closed pedrognsmartins closed 1 year ago

pedrognsmartins commented 1 year ago

Dear community,

When trying to install Falcon model and running I´m getting the following error:

┌───────────────────── Traceback (most recent call last) ─────────────────────┐ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:1395 │ │ in download_models │ │ │ │ 1392 │ ).for_model(model_name, model_id=model_id, llm_config=config) │ │ 1393 │ │ │ 1394 │ try: │ │ > 1395 │ │ _ref = bentoml.transformers.get(model.tag) │ │ 1396 │ │ if machine: │ │ 1397 │ │ │ # NOTE: When debug is enabled, │ │ 1398 │ │ │ # We will prefix the tag with tag and we can use reg │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\bentoml_internal\f │ │ rameworks\transformers.py:292 in get │ │ │ │ 289 │ # target model must be from the BentoML model store │ │ 290 │ model = bentoml.transformers.get("my_pipeline:latest") │ │ 291 │ """ │ │ > 292 │ model = bentoml.models.get(tag_like) │ │ 293 │ if model.info.module not in (MODULE_NAME, name): │ │ 294 │ │ raise NotFound( │ │ 295 │ │ │ f"Model {model.tag} was saved with module {model.info.mod │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\simpledi__init__. │ │ py:139 in │ │ │ │ 136 │ │ bind = sig.bind_partial(*filtered_args, *filtered_kwargs) │ │ 137 │ │ bind.apply_defaults() │ │ 138 │ │ │ │ > 139 │ │ return func(_inject_args(bind.args), *_injectkwargs(bind.k │ │ 140 │ │ │ 141 │ setattr(, "_isinjected", True) │ │ 142 │ return cast(WrappedCallable, ) │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\bentoml\models.py:4 │ │ 2 in get │ │ │ │ 39 │ , │ │ 40 │ _model_store: "ModelStore" = Provide[BentoMLContainer.model_store │ │ 41 ) -> "Model": │ │ > 42 │ return _model_store.get(tag) │ │ 43 │ │ 44 │ │ 45 @inject │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\bentoml_internal\s │ │ tore.py:146 in get │ │ │ │ 143 │ │ matches = self._fs.glob(f"{path}*/") │ │ 144 │ │ counts = matches.count().directories │ │ 145 │ │ if counts == 0: │ │ > 146 │ │ │ raise NotFound( │ │ 147 │ │ │ │ f"{self._item_type.get_typename()} '{tag}' is not fou │ │ 148 │ │ │ ) │ │ 149 │ │ elif counts == 1: │ └─────────────────────────────────────────────────────────────────────────────┘ NotFound: Model 'pt-tiiuae-falcon-7b:2f5c3cd4eace6be6c0f12981f377fb35e5bf6ee5' is not found in BentoML store <osfs 'C:\Users\pedro\bentoml\models'>

During handling of the above exception, another exception occurred:

┌───────────────────── Traceback (most recent call last) ─────────────────────┐ │ in _run_module_as_main:198 │ │ in _run_code:88 │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm__main.py │ │ :26 in │ │ │ │ 23 if name == "main": │ │ 24 │ from openllm.cli import cli │ │ 25 │ │ │ > 26 │ cli() │ │ 27 │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1130 │ │ in call │ │ │ │ 1127 │ │ │ 1128 │ def call__(self, *args: t.Any, kwargs: t.Any) -> t.Any: │ │ 1129 │ │ """Alias for :meth:main.""" │ │ > 1130 │ │ return self.main(*args, kwargs) │ │ 1131 │ │ 1132 │ │ 1133 class Command(BaseCommand): │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1055 │ │ in main │ │ │ │ 1052 │ │ try: │ │ 1053 │ │ │ try: │ │ 1054 │ │ │ │ with self.make_context(prog_name, args, extra) as │ │ > 1055 │ │ │ │ │ rv = self.invoke(ctx) │ │ 1056 │ │ │ │ │ if not standalone_mode: │ │ 1057 │ │ │ │ │ │ return rv │ │ 1058 │ │ │ │ │ # it's not safe to ctx.exit(rv) here! │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1657 │ │ in invoke │ │ │ │ 1654 │ │ │ │ super().invoke(ctx) │ │ 1655 │ │ │ │ sub_ctx = cmd.make_context(cmd_name, args, parent=ct │ │ 1656 │ │ │ │ with sub_ctx: │ │ > 1657 │ │ │ │ │ return _process_result(sub_ctx.command.invoke(su │ │ 1658 │ │ │ │ 1659 │ │ # In chain mode we create the contexts step by step, but aft │ │ 1660 │ │ # base command has been invoked. Because at that point we d │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1404 │ │ in invoke │ │ │ │ 1401 │ │ │ echo(style(message, fg="red"), err=True) │ │ 1402 │ │ │ │ 1403 │ │ if self.callback is not None: │ │ > 1404 │ │ │ return ctx.invoke(self.callback, *ctx.params) │ │ 1405 │ │ │ 1406 │ def shell_complete(self, ctx: Context, incomplete: str) -> t.Lis │ │ 1407 │ │ """Return a list of completions for the incomplete value. Lo │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:760 │ │ in invoke │ │ │ │ 757 │ │ │ │ 758 │ │ with augment_usage_errors(self): │ │ 759 │ │ │ with ctx: │ │ > 760 │ │ │ │ return callback(args, kwargs) │ │ 761 │ │ │ 762 │ def forward( │ │ 763 │ │ self, cmd: "Command", *args: t.Any, kwargs: t.Any # n │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:380 │ │ in wrapper │ │ │ │ 377 │ │ @functools.wraps(func) │ │ 378 │ │ def wrapper(*args: P.args, *attrs: P.kwargs) -> t.Any: │ │ 379 │ │ │ try: │ │ > 380 │ │ │ │ return func(args, attrs) │ │ 381 │ │ │ except OpenLLMException as err: │ │ 382 │ │ │ │ raise click.ClickException( │ │ 383 │ │ │ │ │ click.style(f"[{group.name}] '{command_name}' fa │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:353 │ │ in wrapper │ │ │ │ 350 │ │ │ │ assert group.name is not None, "group.name should no │ │ 351 │ │ │ │ event = analytics.OpenllmCliEvent(cmd_group=group.na │ │ 352 │ │ │ │ try: │ │ > 353 │ │ │ │ │ return_value = func(*args, attrs) │ │ 354 │ │ │ │ │ duration_in_ms = (time.time_ns() - start_time) / │ │ 355 │ │ │ │ │ event.duration_in_ms = duration_in_ms │ │ 356 │ │ │ │ │ analytics.track(event) │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:328 │ │ in wrapper │ │ │ │ 325 │ │ │ │ │ 326 │ │ │ configure_logging() │ │ 327 │ │ │ │ │ > 328 │ │ │ return f(*args, *attrs) │ │ 329 │ │ │ │ 330 │ │ return t.cast("ClickFunctionWrapper[..., t.Any]", wrapper) │ │ 331 │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:1422 │ │ in download_models │ │ │ │ 1419 │ │ │ ) │ │ 1420 │ │ │ │ 1421 │ │ (model_args, model_attrs), tokenizer_attrs = model.llm_param │ │ > 1422 │ │ _ref = model.import_model( │ │ 1423 │ │ │ model.model_id, │ │ 1424 │ │ │ model.tag, │ │ 1425 │ │ │ model_args, │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\models\falc │ │ on\modeling_falcon.py:56 in import_model │ │ │ │ 53 │ │ device_map = attrs.pop("device_map", "auto") │ │ 54 │ │ │ │ 55 │ │ tokenizer = transformers.AutoTokenizer.frompretrained(model │ │ > 56 │ │ model = transformers.AutoModelForCausalLM.from_pretrained( │ │ 57 │ │ │ model_id, │ │ 58 │ │ │ trust_remote_code=trust_remote_code, │ │ 59 │ │ │ torch_dtype=torch_dtype, │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\transformers\models │ │ \auto\auto_factory.py:479 in from_pretrained │ │ │ │ 476 │ │ │ │ class_ref, pretrained_model_name_or_path, hubkwarg │ │ 477 │ │ │ ) │ │ 478 │ │ │ = hub_kwargs.pop("code_revision", None) │ │ > 479 │ │ │ return model_class.from_pretrained( │ │ 480 │ │ │ │ pretrained_model_name_or_path, model_args, config=co │ │ 481 │ │ │ ) │ │ 482 │ │ elif type(config) in cls._model_mapping.keys(): │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\transformers\modeli │ │ ng_utils.py:2881 in from_pretrained │ │ │ │ 2878 │ │ │ │ mismatched_keys, │ │ 2879 │ │ │ │ offload_index, │ │ 2880 │ │ │ │ error_msgs, │ │ > 2881 │ │ │ ) = cls._load_pretrained_model( │ │ 2882 │ │ │ │ model, │ │ 2883 │ │ │ │ state_dict, │ │ 2884 │ │ │ │ loaded_state_dict_keys, # XXX: rename? │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\transformers\modeli │ │ ng_utils.py:2980 in _load_pretrained_model │ │ │ │ 2977 │ │ │ ) │ │ 2978 │ │ │ is_safetensors = archive_file.endswith(".safetensors") │ │ 2979 │ │ │ if offload_folder is None and not is_safetensors: │ │ > 2980 │ │ │ │ raise ValueError( │ │ 2981 │ │ │ │ │ "The current device_map had weights offloaded │ │ 2982 │ │ │ │ │ " for them. Alternatively, make sure you have s │ │ 2983 │ │ │ │ │ " offers the weights in this format." │ └─────────────────────────────────────────────────────────────────────────────┘ ValueError: The currentdevice_maphad weights offloaded to the disk. Please provide anoffload_folderfor them. Alternatively, make sure you have safetensors` installed if the model you are using offers the weights in this format. Traceback (most recent call last): File "", line 198, in _run_module_as_main File "", line 88, in _run_code File "C:\Users\pedro\anaconda3\envs\powerai\Scripts\openllm.exe__main.py", line 7, in File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 1130, in call__ return self.main(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 1055, in main rv = self.invoke(ctx) ^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 1657, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 1657, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 1404, in invoke return ctx.invoke(self.callback, ctx.params) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 760, in invoke return __callback(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py", line 380, in wrapper return func(*args, *attrs) ^^^^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py", line 353, in wrapper return_value = func(args, attrs) ^^^^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py", line 328, in wrapper return f(*args, *attrs) ^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\decorators.py", line 26, in new_func return f(get_current_context(), args, *kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py", line 797, in model_start llm = t.cast( ^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\models\auto\factory.py", line 135, in for_model llm.ensure_model_id_exists() File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm_llm.py", line 900, in ensure_model_id_exists output = subprocess.check_output( ^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\subprocess.py", line 466, in check_output return run(popenargs, stdout=PIPE, timeout=timeout, check=True, ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\pedro\anaconda3\envs\powerai\Lib\subprocess.py", line 571, in run raise CalledProcessError(retcode, process.args, subprocess.CalledProcessError: Command '['C:\Users\pedro\anaconda3\envs\powerai\python.exe', '-m', 'openllm', 'download', 'falcon', '--model-id', 'tiiuae/falcon-7b', '--machine', '--implementation', 'pt']' returned non-zero exit status 1.

Can someone help me with the topic ?

Thank you.

kenleejr commented 1 year ago

I got the same issue trying to use falcon with openllm==0.1.17

aarnphm commented 1 year ago

seems like your machine doesn't have enough resource, hence they are offloading it to disk. I will need more bandwidth to investigate how to run falcon on smaller machine

cmazzoni87 commented 1 year ago

I have the same issue with a massive resource server:

CalledProcessError Traceback (most recent call last)

in 3 import os 4 ----> 5 llm = OpenLLM(model_name='falcon', model_id='tiiuae/falcon-40b-instruct', temperature=0.0) 6 7 llm("What is the difference between a duck and a goose? And why there are so many Goose in Canada?") ~/.local/lib/python3.8/site-packages/langchain/llms/openllm.py in __init__(self, model_name, model_id, server_url, server_type, embedded, **llm_kwargs) 168 # in-process. Wrt to BentoML users, setting embedded=False is the expected 169 # behaviour to invoke the runners remotely --> 170 runner = openllm.Runner( 171 model_name=model_name, 172 model_id=model_id, ~/.local/lib/python3.8/site-packages/openllm/_llm.py in Runner(model_name, ensure_available, init_local, implementation, **attrs) 1404 behaviour 1405 """ -> 1406 runner = t.cast( 1407 "_BaseAutoLLMClass", 1408 openllm[implementation if implementation is not None else EnvVarMixin(model_name)["framework_value"]], # type: ignore (internal API) ~/.local/lib/python3.8/site-packages/openllm/models/auto/factory.py in create_runner(cls, model_name, model_id, **attrs) 155 A LLM instance. 156 """ --> 157 llm, runner_attrs = cls.for_model(model_name, model_id, return_runner_kwargs=True, **attrs) 158 return llm.to_runner(**runner_attrs) 159 ~/.local/lib/python3.8/site-packages/openllm/models/auto/factory.py in for_model(cls, model_name, model_id, return_runner_kwargs, llm_config, ensure_available, **attrs) 133 llm.model_id, 134 ) --> 135 llm.ensure_model_id_exists() 136 if not return_runner_kwargs: 137 return llm ~/.local/lib/python3.8/site-packages/openllm/_llm.py in ensure_model_id_exists(self) 898 Auto LLM initialisation. 899 """ --> 900 output = subprocess.check_output( 901 [ 902 sys.executable, /usr/lib/python3.8/subprocess.py in check_output(timeout, *popenargs, **kwargs) 413 kwargs['input'] = empty 414 --> 415 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True, 416 **kwargs).stdout 417 /usr/lib/python3.8/subprocess.py in run(input, capture_output, timeout, check, *popenargs, **kwargs) 514 retcode = process.poll() 515 if check and retcode: --> 516 raise CalledProcessError(retcode, process.args, 517 output=stdout, stderr=stderr) 518 return CompletedProcess(process.args, retcode, stdout, stderr) CalledProcessError: Command '['/usr/bin/python3', '-m', 'openllm', 'download', 'falcon', '--model-id', 'tiiuae/falcon-40b-instruct', '--machine', '--implementation', 'pt']' returned non-zero exit status 1.
cmazzoni87 commented 1 year ago

Same issue as the one on ticket: https://github.com/bentoml/OpenLLM/issues/121

aarnphm commented 1 year ago

Please reopen if you still see this error on 0.3.0