System Information

ZENML_LOCAL_VERSION: 0.57.1 ZENML_SERVER_VERSION: 0.57.1 ZENML_SERVER_DATABASE: sqlite ZENML_SERVER_DEPLOYMENT_TYPE: other ZENML_CONFIG_DIR: /root/.config/zenml ZENML_LOCAL_STORE_DIR: /root/.config/zenml/local_stores ZENML_SERVER_URL: sqlite:////root/.config/zenml/local_stores/default_zen_store/zenml.db ZENML_ACTIVE_REPOSITORY_ROOT: None PYTHON_VERSION: 3.10.0 ENVIRONMENT: native SYSTEM_INFO: {'os': 'linux', 'linux_distro': 'ubuntu', 'linux_distro_like': 'debian', 'linux_distro_version': '22.04'} ACTIVE_WORKSPACE: default ACTIVE_STACK: default ACTIVE_USER: default TELEMETRY_STATUS: enabled ANALYTICS_CLIENT_ID: c78fc8ea-738d-4d37-8ba9-aec7ebe46f5c ANALYTICS_USER_ID: 55e2f3f9-af52-4126-82a6-1c083fcda7c5 ANALYTICS_SERVER_ID: c78fc8ea-738d-4d37-8ba9-aec7ebe46f5c INTEGRATIONS: ['aws', 'bentoml', 'bitbucket', 'huggingface', 'kaniko', 'mlflow', 'pillow', 'pytorch', 's3', 'scipy', 'sklearn', 'slack'] PACKAGES: {'argon2-cffi': '21.3.0', 'argon2-cffi-bindings': '21.2.0', 'awscli': '1.32.3', 'bamboolib': '1.30.16', 'defusedxml': '0.7.1', 'docutils': '0.16', 'ipyslickgrid': '0.0.3', 'ipython-genutils': '0.2.0', 'jupyterlab-pygments': '0.2.2', 'mypy': '1.3.0', 'mypy-extensions': '1.0.0', 'pandocfilters': '1.5.0', 'plotly': '5.10.0', 'ppscore': '1.2.0', 'rsa': '4.7.2', 'xlrd': '2.0.1', 'brotli': '1.1.0', 'deprecated': '1.2.14', 'gitpython': '3.1.43', 'mako': '1.3.5', 'markdown': '3.6', 'markupsafe': '2.1.5', 'pyjwt': '2.7.0', 'pymysql': '1.0.3', 'pyyaml': '6.0.1', 'sqlalchemy': '1.4.41', 'sqlalchemy-utils': '0.38.3', 'absl-py': '2.1.0', 'accelerate': '0.30.1', 'aiobotocore': '2.7.0', 'aiofiles': '23.2.1', 'aiohttp': '3.9.5', 'aiohttp-cors': '0.7.0', 'aioitertools': '0.11.0', 'aiokafka': '0.10.0', 'aiosignal': '1.3.1', 'alembic': '1.8.1', 'altair': '5.3.0', 'aniso8601': '9.0.1', 'annotated-types': '0.7.0', 'anyio': '4.3.0', 'appdirs': '1.4.4', 'argparse': '1.4.0', 'asgiref': '3.8.1', 'asttokens': '2.4.1', 'async-timeout': '4.0.3', 'attrs': '22.2.0', 'aws-profile-manager': '0.7.3', 'azure-common': '1.1.28', 'azure-core': '1.30.1', 'azure-mgmt-core': '1.4.0', 'azure-mgmt-resource': '23.1.1', 'bcrypt': '4.0.1', 'bentoml': '1.2.16', 'bert-score': '0.3.13', 'blinker': '1.8.2', 'boto3': '1.28.64', 'botocore': '1.31.64', 'build': '1.2.1', 'cachetools': '5.3.3', 'cattrs': '23.1.2', 'certifi': '2024.2.2', 'cffi': '1.16.0', 'charset-normalizer': '3.3.2', 'circus': '0.18.0', 'click': '8.1.3', 'click-option-group': '0.5.6', 'click-params': '0.3.0', 'cloudpickle': '2.2.1', 'colorama': '0.4.6', 'coloredlogs': '15.0.1', 'colorful': '0.5.6', 'comm': '0.2.2', 'configparser': '7.0.0', 'contourpy': '1.2.1', 'cryptography': '42.0.7', 'cycler': '0.12.1', 'dataclasses-json': '0.6.6', 'datasets': '2.19.1', 'debugpy': '1.8.1', 'decorator': '5.1.1', 'deepmerge': '1.1.1', 'dill': '0.3.8', 'distlib': '0.3.8', 'distro': '1.9.0', 'dnspython': '2.6.1', 'docker': '6.1.3', 'email-validator': '2.1.1', 'entrypoints': '0.4', 'evaluate': '0.4.2', 'exceptiongroup': '1.2.0', 'executing': '2.0.1', 'fastapi': '0.89.1', 'fastapi-cli': '0.0.4', 'fastapi-utils': '0.2.1', 'fastt5': '0.0.5', 'ffmpy': '0.3.2', 'filelock': '3.14.0', 'flask': '3.0.3', 'flatbuffers': '24.3.25', 'fonttools': '4.51.0', 'frozenlist': '1.4.1', 'fs': '2.4.16', 'fsspec': '2023.10.0', 'gevent': '24.2.1', 'geventhttpclient': '2.0.2', 'gitdb': '4.0.11', 'google-api-core': '2.19.0', 'google-auth': '2.29.0', 'google-pasta': '0.2.0', 'googleapis-common-protos': '1.63.0', 'gradio': '3.50.2', 'gradio-client': '0.6.1', 'graphene': '3.3', 'graphql-core': '3.2.3', 'graphql-relay': '3.2.0', 'greenlet': '3.0.3', 'grpcio': '1.64.0', 'gunicorn': '21.2.0', 'h11': '0.14.0', 'httpcore': '1.0.5', 'httplib2': '0.19.1', 'httptools': '0.6.1', 'httpx': '0.27.0', 'huggingface': '0.0.1', 'huggingface-hub': '0.23.0', 'humanfriendly': '10.0', 'icecream': '2.1.3', 'idna': '3.7', 'importlib-metadata': '4.13.0', 'importlib-resources': '6.4.0', 'inflection': '0.5.1', 'ipinfo': '5.0.1', 'ipykernel': '6.29.3', 'ipython': '8.24.0', 'ipywidgets': '8.1.2', 'isodate': '0.6.1', 'itsdangerous': '2.2.0', 'jedi': '0.19.1', 'jinja2': '3.1.4', 'jmespath': '1.0.1', 'joblib': '1.4.2', 'jsonpatch': '1.33', 'jsonpointer': '2.4', 'jsonschema': '4.22.0', 'jsonschema-specifications': '2023.12.1', 'jupyter-client': '8.6.1', 'jupyter-core': '5.7.2', 'jupyterlab-widgets': '3.0.10', 'kiwisolver': '1.4.5', 'kubernetes': '29.0.0', 'langchain': '0.2.0', 'langchain-community': '0.2.0', 'langchain-core': '0.2.1', 'langchain-text-splitters': '0.2.0', 'langsmith': '0.1.61', 'linkify-it-py': '2.0.3', 'lxml': '5.2.2', 'markdown-it-py': '3.0.0', 'marshmallow': '3.21.2', 'matplotlib': '3.9.0', 'matplotlib-inline': '0.1.7', 'mdit-py-plugins': '0.4.1', 'mdurl': '0.1.2', 'memray': '1.12.0', 'mlflow': '2.12.1', 'mlserver': '1.3.5', 'mlserver-mlflow': '1.5.0', 'mpmath': '1.3.0', 'msgpack': '1.0.8', 'multidict': '6.0.5', 'multiprocess': '0.70.16', 'nest-asyncio': '1.6.0', 'networkx': '3.3', 'nltk': '3.8.1', 'numpy': '1.26.4', 'nvidia-cublas-cu12': '12.1.3.1', 'nvidia-cuda-cupti-cu12': '12.1.105', 'nvidia-cuda-nvrtc-cu12': '12.1.105', 'nvidia-cuda-runtime-cu12': '12.1.105', 'nvidia-cudnn-cu12': '8.9.2.26', 'nvidia-cufft-cu12': '11.0.2.54', 'nvidia-curand-cu12': '10.3.2.106', 'nvidia-cusolver-cu12': '11.4.5.107', 'nvidia-cusparse-cu12': '12.1.0.106', 'nvidia-ml-py': '11.525.150', 'nvidia-nccl-cu12': '2.20.5', 'nvidia-nvjitlink-cu12': '12.4.127', 'nvidia-nvtx-cu12': '12.1.105', 'oauthlib': '3.2.2', 'onnx': '1.16.1', 'onnxruntime': '1.18.0', 'opencensus': '0.11.4', 'opencensus-context': '0.1.3', 'opentelemetry-api': '1.20.0', 'opentelemetry-instrumentation': '0.41b0', 'opentelemetry-instrumentation-aiohttp-client': '0.41b0', 'opentelemetry-instrumentation-asgi': '0.41b0', 'opentelemetry-sdk': '1.20.0', 'opentelemetry-semantic-conventions': '0.41b0', 'opentelemetry-util-http': '0.41b0', 'optimum': '1.20.0.dev0', 'orjson': '3.10.3', 'packaging': '23.2', 'pandas': '2.2.2', 'parso': '0.8.4', 'passlib': '1.7.4', 'pathos': '0.3.2', 'pathspec': '0.12.1', 'pexpect': '4.9.0', 'pickleshare': '0.7.5', 'pillow': '10.3.0', 'pip': '24.0', 'pip-requirements-parser': '32.0.1', 'pip-tools': '7.4.1', 'platformdirs': '4.2.2', 'portalocker': '2.8.2', 'pox': '0.3.4', 'ppft': '1.7.6.8', 'progress': '1.6', 'prometheus-client': '0.20.0', 'prompt-toolkit': '3.0.42', 'proto-plus': '1.23.0', 'protobuf': '3.20.3', 'protobuf3-to-dict': '0.1.5', 'psutil': '5.9.8', 'ptyprocess': '0.7.0', 'pure-eval': '0.2.2', 'py-grpc-prometheus': '0.8.0', 'py-spy': '0.3.14', 'pyarrow': '15.0.2', 'pyarrow-hotfix': '0.6', 'pyasn1': '0.6.0', 'pyasn1-modules': '0.4.0', 'pycparser': '2.22', 'pydantic': '1.10.15', 'pydantic-core': '2.18.2', 'pydub': '0.25.1', 'pygments': '2.18.0', 'pyparsing': '2.4.7', 'pyproject-hooks': '1.1.0', 'python-dateutil': '2.9.0', 'python-dotenv': '1.0.1', 'python-json-logger': '2.0.7', 'python-multipart': '0.0.9', 'python-rapidjson': '1.14', 'pytz': '2024.1', 'pyzmq': '26.0.3', 'querystring-parser': '1.2.4', 'ray': '2.23.0', 'referencing': '0.35.1', 'regex': '2024.5.15', 'requests': '2.32.1', 'requests-oauthlib': '2.0.0', 'rich': '13.7.1', 'rouge-score': '0.1.2', 'rpds-py': '0.18.1', 'ruff': '0.4.4', 's3fs': '2023.10.0', 's3transfer': '0.7.0', 'sacrebleu': '2.4.2', 'safetensors': '0.4.3', 'sagemaker': '2.117.0', 'schema': '0.7.7', 'scikit-learn': '1.4.2', 'scipy': '1.13.0', 'secure': '0.3.0', 'semantic-version': '2.10.0', 'sentencepiece': '0.2.0', 'setuptools': '69.5.1', 'shellingham': '1.5.4', 'simple-di': '0.1.5', 'six': '1.16.0', 'slack-sdk': '3.27.2', 'smart-open': '7.0.4', 'smdebug-rulesconfig': '1.0.1', 'smmap': '5.0.1', 'sniffio': '1.3.1', 'sqlalchemy2-stubs': '0.0.2a38', 'sqlmodel': '0.0.8', 'sqlparse': '0.5.0', 'stack-data': '0.6.2', 'starlette': '0.37.2', 'starlette-exporter': '0.17.1', 'sympy': '1.12', 'tabulate': '0.9.0', 'tenacity': '8.3.0', 'textual': '0.63.4', 'threadpoolctl': '3.5.0', 'tokenizers': '0.13.3', 'tomli': '2.0.1', 'tomli-w': '1.0.0', 'tomlkit': '0.12.0', 'toolz': '0.12.1', 'torch': '2.3.0', 'torchvision': '0.18.0', 'tornado': '6.4', 'tqdm': '4.66.4', 'traitlets': '5.14.3', 'transformers': '4.31.0', 'triton': '2.3.0', 'tritonclient': '2.45.0', 'typer': '0.12.3', 'typing-extensions': '4.11.0', 'typing-inspect': '0.9.0', 'tzdata': '2024.1', 'uc-micro-py': '1.0.3', 'ujson': '5.10.0', 'urllib3': '2.0.7', 'uvicorn': '0.29.0', 'uvloop': '0.19.0', 'validators': '0.18.2', 'virtualenv': '20.26.2', 'watchfiles': '0.21.0', 'wcwidth': '0.2.13', 'websocket-client': '1.8.0', 'websockets': '11.0.3', 'werkzeug': '3.0.3', 'wheel': '0.43.0', 'widgetsnbextension': '4.0.10', 'wrapt': '1.16.0', 'xmltodict': '0.13.0', 'xxhash': '3.4.1', 'yarl': '1.9.4', 'zenml': '0.57.1', 'zipp': '3.17.0', 'zope.event': '5.0', 'zope.interface': '6.4'}

CURRENT STACK

Name: default ID: 80400df8-f083-451c-adb8-c428c1dac07f Workspace: default / dc072673-926e-4729-afc9-a1674661c86b

ORCHESTRATOR: default

Name: default ID: 88a7725b-e56d-4312-8011-2282281f93f8 Type: orchestrator Flavor: local Configuration: {} Workspace: default / dc072673-926e-4729-afc9-a1674661c86b

ARTIFACT_STORE: default

Name: default ID: ece5edbf-255e-4390-93ca-30465cface73 Type: artifact_store Flavor: local Configuration: {'path': ''} Workspace: default / dc072673-926e-4729-afc9-a1674661c86b

What happened?

There is a issue with mt5 and umt5(with sentencepiece installed) . When sentencepiece uninstalled this works fine for umt5.

[Errno 2] No such file or directory: '/tmp/tmpc33r27ji/spiece.model'
Traceback (most recent call last):
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_launcher.py", line 230, in launch
    self._run_step(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_launcher.py", line 442, in _run_step
    self._run_step_without_step_operator(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_launcher.py", line 517, in _run_step_without_step_operator
    runner.run(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_runner.py", line 250, in run
    output_artifact_ids = self._store_output_artifacts(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_runner.py", line 624, in _store_output_artifacts
    artifact = save_artifact(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/artifacts/utils.py", line 183, in save_artifact
    materializer_object.save(data)
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py", line 83, in save
    tokenizer.save_pretrained(temp_dir.name)
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 2221, in save_pretrained
    save_files = self._save_pretrained(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py", line 595, in _save_pretrained
    vocab_files = self.save_vocabulary(save_directory, filename_prefix=filename_prefix)
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/transformers/models/t5/tokenization_t5_fast.py", line 192, in save_vocabulary
    copyfile(self.vocab_file, out_vocab_file)
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/shutil.py", line 254, in copyfile
    with open(src, 'rb') as fsrc:
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmpc33r27ji/spiece.model'

Reproduction steps

Code Snippet
```
from typing import Tuple
```

from transformers import ( AutoConfig, AutoModelForSeq2SeqLM, AutoTokenizer, PreTrainedModel, PreTrainedTokenizerBase, ) from typing_extensions import Annotated from zenml import ArtifactConfig, pipeline, step

MODEL_NAME = "google/umt5-small"

@step def tokenizer_loader() -> ( Annotated[ PreTrainedTokenizerBase, ArtifactConfig(name="tokenizer", is_model_artifact=True), ] ): return AutoTokenizer.from_pretrained(MODEL_NAME)

@step(enable_cache=False) def model_trainer( tokenizer: PreTrainedTokenizerBase, model_name: str = "google/umt5-small", ) -> Tuple[ Annotated[ PreTrainedModel, ArtifactConfig(name="model", is_model_artifact=True), ], Annotated[ PreTrainedTokenizerBase, ArtifactConfig(name="tokenizer", is_model_artifact=True), ], ]: model_config = AutoConfig.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name, config=model_config) print("Now I am returning the model and tokenizer") print("---------------------------")

print(tokenizer)

print("---------------------------")

return model, tokenizer

@pipeline(enable_cache=False) def training_pipeline(): tokenizer = tokenizer_loader() model, tokenizer = model_trainer(tokenizer=tokenizer, model_name=MODEL_NAME)

if name == "main": training_pipeline()


2. Installed libraries 
`pip install zenml["server"] transformers["torch"] sentencepiece `

4. Integration and Init
`zenml integration install pytorch mlflow huggingface aws s3 kubeflow slack github -y`
`zenml init`

5 Run
`python filename.py`

...

### Relevant log output

```shell
Traceback (most recent call last):
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_launcher.py", line 230, in launch
    self._run_step(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_launcher.py", line 442, in _run_step
    self._run_step_without_step_operator(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_launcher.py", line 517, in _run_step_without_step_operator
    runner.run(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_runner.py", line 250, in run
    output_artifact_ids = self._store_output_artifacts(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_runner.py", line 624, in _store_output_artifacts
    artifact = save_artifact(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/artifacts/utils.py", line 183, in save_artifact
    materializer_object.save(data)
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py", line 83, in save
    tokenizer.save_pretrained(temp_dir.name)
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 2221, in save_pretrained
    save_files = self._save_pretrained(
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py", line 595, in _save_pretrained
    vocab_files = self.save_vocabulary(save_directory, filename_prefix=filename_prefix)
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/transformers/models/t5/tokenization_t5_fast.py", line 192, in save_vocabulary
    copyfile(self.vocab_file, out_vocab_file)
  File "/mnt/ssd/mamba/envs/x/lib/python3.10/shutil.py", line 254, in copyfile
    with open(src, 'rb') as fsrc:
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmpc33r27ji/spiece.model'

Code of Conduct

[X] I agree to follow this project's Code of Conduct

@safoinme I think now it's introduced another bug when using S3 as ARTIFACT_STORE


╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /mnt/ssd/projects/events/src/debug_train.py:101 in <module>                                      │
│                                                                                                  │
│    98                                                                                            │
│    99                                                                                            │
│   100 if __name__ == "__main__":                                                                 │
│ ❱ 101 │   training_pipeline()                                                                    │
│   102                                                                                            │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/new/pipelines/pipeline.py:1397 in       │
│ __call__                                                                                         │
│                                                                                                  │
│   1394 │   │   │   return self.entrypoint(*args, **kwargs)                                       │
│   1395 │   │                                                                                     │
│   1396 │   │   self.prepare(*args, **kwargs)                                                     │
│ ❱ 1397 │   │   return self._run(**self._run_args)                                                │
│   1398 │                                                                                         │
│   1399 │   def _call_entrypoint(self, *args: Any, **kwargs: Any) -> None:                        │
│   1400 │   │   """Calls the pipeline entrypoint function with the given arguments.               │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/new/pipelines/pipeline.py:758 in _run   │
│                                                                                                  │
│    755 │   │   │   │   │   │   "`zenml up`."                                                     │
│    756 │   │   │   │   │   )                                                                     │
│    757 │   │   │                                                                                 │
│ ❱  758 │   │   │   deploy_pipeline(                                                              │
│    759 │   │   │   │   deployment=deployment_model, stack=stack, placeholder_run=run             │
│    760 │   │   │   )                                                                             │
│    761 │   │   │   if run:                                                                       │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/new/pipelines/run_utils.py:148 in       │
│ deploy_pipeline                                                                                  │
│                                                                                                  │
│   145 │   │   │   # placeholder run to stay in the database                                      │
│   146 │   │   │   Client().delete_pipeline_run(placeholder_run.id)                               │
│   147 │   │                                                                                      │
│ ❱ 148 │   │   raise e                                                                            │
│   149 │   finally:                                                                               │
│   150 │   │   constants.SHOULD_PREVENT_PIPELINE_EXECUTION = previous_value                       │
│   151                                                                                            │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/new/pipelines/run_utils.py:136 in       │
│ deploy_pipeline                                                                                  │
│                                                                                                  │
│   133 │   previous_value = constants.SHOULD_PREVENT_PIPELINE_EXECUTION                           │
│   134 │   constants.SHOULD_PREVENT_PIPELINE_EXECUTION = True                                     │
│   135 │   try:                                                                                   │
│ ❱ 136 │   │   stack.deploy_pipeline(deployment=deployment)                                       │
│   137 │   except Exception as e:                                                                 │
│   138 │   │   if (                                                                               │
│   139 │   │   │   placeholder_run                                                                │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/stack/stack.py:853 in deploy_pipeline   │
│                                                                                                  │
│    850 │   │   Returns:                                                                          │
│    851 │   │   │   The return value of the call to `orchestrator.run_pipeline(...)`.             │
│    852 │   │   """                                                                               │
│ ❱  853 │   │   return self.orchestrator.run(deployment=deployment, stack=self)                   │
│    854 │                                                                                         │
│    855 │   def _get_active_components_for_step(                                                  │
│    856 │   │   self, step_config: "StepConfiguration"                                            │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/base_orchestrator.py:175  │
│ in run                                                                                           │
│                                                                                                  │
│   172 │   │   environment = get_config_environment_vars(deployment=deployment)                   │
│   173 │   │                                                                                      │
│   174 │   │   try:                                                                               │
│ ❱ 175 │   │   │   result = self.prepare_or_run_pipeline(                                         │
│   176 │   │   │   │   deployment=deployment, stack=stack, environment=environment                │
│   177 │   │   │   )                                                                              │
│   178 │   │   finally:                                                                           │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/local/local_orchestrator. │
│ py:78 in prepare_or_run_pipeline                                                                 │
│                                                                                                  │
│    75 │   │   │   │   │   step_name,                                                             │
│    76 │   │   │   │   )                                                                          │
│    77 │   │   │                                                                                  │
│ ❱  78 │   │   │   self.run_step(                                                                 │
│    79 │   │   │   │   step=step,                                                                 │
│    80 │   │   │   )                                                                              │
│    81                                                                                            │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/base_orchestrator.py:195  │
│ in run_step                                                                                      │
│                                                                                                  │
│   192 │   │   │   step=step,                                                                     │
│   193 │   │   │   orchestrator_run_id=self.get_orchestrator_run_id(),                            │
│   194 │   │   )                                                                                  │
│ ❱ 195 │   │   launcher.launch()                                                                  │
│   196 │                                                                                          │
│   197 │   @staticmethod                                                                          │
│   198 │   def requires_resources_in_orchestration_environment(                                   │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_launcher.py:250 in   │
│ launch                                                                                           │
│                                                                                                  │
│   247 │   │   │   │   │   while retries < max_retries:                                           │
│   248 │   │   │   │   │   │   last_retry = retries == max_retries - 1                            │
│   249 │   │   │   │   │   │   try:                                                               │
│ ❱ 250 │   │   │   │   │   │   │   self._run_step(                                                │
│   251 │   │   │   │   │   │   │   │   pipeline_run=pipeline_run,                                 │
│   252 │   │   │   │   │   │   │   │   step_run=step_run_response,                                │
│   253 │   │   │   │   │   │   │   │   last_retry=last_retry,                                     │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_launcher.py:451 in   │
│ _run_step                                                                                        │
│                                                                                                  │
│   448 │   │   │   │   │   last_retry=last_retry,                                                 │
│   449 │   │   │   │   )                                                                          │
│   450 │   │   │   else:                                                                          │
│ ❱ 451 │   │   │   │   self._run_step_without_step_operator(                                      │
│   452 │   │   │   │   │   pipeline_run=pipeline_run,                                             │
│   453 │   │   │   │   │   step_run=step_run,                                                     │
│   454 │   │   │   │   │   step_run_info=step_run_info,                                           │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_launcher.py:535 in   │
│ _run_step_without_step_operator                                                                  │
│                                                                                                  │
│   532 │   │   if last_retry:                                                                     │
│   533 │   │   │   os.environ[ENV_ZENML_IGNORE_FAILURE_HOOK] = "false"                            │
│   534 │   │   runner = StepRunner(step=self._step, stack=self._stack)                            │
│ ❱ 535 │   │   runner.run(                                                                        │
│   536 │   │   │   pipeline_run=pipeline_run,                                                     │
│   537 │   │   │   step_run=step_run,                                                             │
│   538 │   │   │   input_artifacts=input_artifacts,                                               │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_runner.py:189 in run │
│                                                                                                  │
│   186 │   │   │   │   self._prepare_model_context_for_step()                                     │
│   187 │   │   │   │                                                                              │
│   188 │   │   │   │   # Parse the inputs for the entrypoint function.                            │
│ ❱ 189 │   │   │   │   function_params = self._parse_inputs(                                      │
│   190 │   │   │   │   │   args=spec.args,                                                        │
│   191 │   │   │   │   │   annotations=spec.annotations,                                          │
│   192 │   │   │   │   │   input_artifacts=input_artifacts,                                       │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_runner.py:355 in     │
│ _parse_inputs                                                                                    │
│                                                                                                  │
│   352 │   │   │   │   )                                                                          │
│   353 │   │   │   │   function_params[arg] = get_step_context()                                  │
│   354 │   │   │   elif arg in input_artifacts:                                                   │
│ ❱ 355 │   │   │   │   function_params[arg] = self._load_input_artifact(                          │
│   356 │   │   │   │   │   input_artifacts[arg], arg_type                                         │
│   357 │   │   │   │   )                                                                          │
│   358 │   │   │   elif arg in self.configuration.parameters:                                     │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/orchestrators/step_runner.py:458 in     │
│ _load_input_artifact                                                                             │
│                                                                                                  │
│   455 │   │   )                                                                                  │
│   456 │   │   materializer: BaseMaterializer = materializer_class(artifact.uri)                  │
│   457 │   │   materializer.validate_type_compatibility(data_type)                                │
│ ❱ 458 │   │   return materializer.load(data_type=data_type)                                      │
│   459 │                                                                                          │
│   460 │   def _validate_outputs(                                                                 │
│   461 │   │   self,                                                                              │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/zenml/integrations/huggingface/materializers/ │
│ huggingface_tokenizer_materializer.py:58 in load                                                 │
│                                                                                                  │
│   55 │   │                                                                                       │
│   56 │   │   print(os.path.join(self.uri, DEFAULT_TOKENIZER_DIR))                                │
│   57 │   │                                                                                       │
│ ❱ 58 │   │   return AutoTokenizer.from_pretrained(                                               │
│   59 │   │   │   os.path.join(self.uri, DEFAULT_TOKENIZER_DIR),                                  │
│   60 │   │   )                                                                                                          │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py │
│ :652 in from_pretrained                                                                          │
│                                                                                                  │
│   649 │   │   │   return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *input   │
│   650 │   │                                                                                      │
│   651 │   │   # Next, let's try to use the tokenizer_config file to get the tokenizer class.     │
│ ❱ 652 │   │   tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)   │
│   653 │   │   if "_commit_hash" in tokenizer_config:                                             │
│   654 │   │   │   kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]                      │
│   655 │   │   config_tokenizer_class = tokenizer_config.get("tokenizer_class")                   │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py │
│ :496 in get_tokenizer_config                                                                     │
│                                                                                                  │
│   493 │   tokenizer_config = get_tokenizer_config("tokenizer-test")                              │
│   494 │   ```"""                                                                                 │
│   495 │   commit_hash = kwargs.get("_commit_hash", None)                                         │
│ ❱ 496 │   resolved_config_file = cached_file(                                                    │
│   497 │   │   pretrained_model_name_or_path,                                                     │
│   498 │   │   TOKENIZER_CONFIG_FILE,                                                             │
│   499 │   │   cache_dir=cache_dir,                                                               │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/transformers/utils/hub.py:417 in cached_file  │
│                                                                                                  │
│    414 │   user_agent = http_user_agent(user_agent)                                              │
│    415 │   try:                                                                                  │
│    416 │   │   # Load from URL or cache if already cached                                        │
│ ❱  417 │   │   resolved_file = hf_hub_download(                                                  │
│    418 │   │   │   path_or_repo_id,                                                              │
│    419 │   │   │   filename,                                                                     │
│    420 │   │   │   subfolder=None if len(subfolder) == 0 else subfolder,                         │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:106 in   │
│ _inner_fn                                                                                        │
│                                                                                                  │
│   103 │   │   │   kwargs.items(),  # Kwargs values                                               │
│   104 │   │   ):                                                                                 │
│   105 │   │   │   if arg_name in ["repo_id", "from_id", "to_id"]:                                │
│ ❱ 106 │   │   │   │   validate_repo_id(arg_value)                                                │
│   107 │   │   │                                                                                  │
│   108 │   │   │   elif arg_name == "token" and arg_value is not None:                            │
│   109 │   │   │   │   has_token = True                                                           │
│                                                                                                  │
│ /mnt/ssd/mamba/envs/x/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:154 in   │
│ validate_repo_id                                                                                 │
│                                                                                                  │
│   151 │   │   raise HFValidationError(f"Repo id must be a string, not {type(repo_id)}: '{repo_   │
│   152 │                                                                                          │
│   153 │   if repo_id.count("/") > 1:                                                             │
│ ❱ 154 │   │   raise HFValidationError(                                                           │
│   155 │   │   │   "Repo id must be in the form 'repo_name' or 'namespace/repo_name':"            │
│   156 │   │   │   f" '{repo_id}'. Use `repo_type` argument if needed."                           │
│   157 │   │   )                                                                                  │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': 
's3://xxx-xxx-xxx/tokenizer_loader/tokenizer/xxac9ce6-xxxx-xxxx-xxxx-00xxxxx/xxxxxx/hf_tokenizer'. Use `repo_type` argument if needed.

zenml-io / zenml

[BUG]: mt5 tokenizer spiece.model saving issue #2739

System Information

What happened?

Reproduction steps

print(tokenizer)

Code of Conduct