CREATE OR REPLACE CONNECTION BUCKETFS_CONNECTION
TO 'http://localhost:6583/default/models;bfsdefault'
USER 'w'
IDENTIFIED BY 'write';
SELECT TE_MODEL_DOWNLOADER_UDF(
'sschet/biomedical-ner-all',
'token_classification',
'BUCKETFS_CONNECTION',
NULL
);
SELECT TE_TOKEN_CLASSIFICATION_UDF(
NULL,
'BUCKETFS_CONNECTION',
'token_classification',
'sschet/biomedical-ner-all',
'Influenza, commonly known as "the flu" or just "flu", is an infectious disease caused by influenza viruses. Symptoms range from mild to severe and often include fever, runny nose, sore throat, muscle pain, headache, coughing, and fatigue.',
NULL
);
Error Message of TE_TOKEN_CLASSIFICATION_UDF
Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/transformers/utils/hub.py", line 398, in cached_file resolved_file = hf_hub_download( File "/usr/local/lib/pytho
n3.8/dist-packages/huggingface_hub/utils/_validators.py", line 110, in _inner_fn validate_repo_id(arg_value) File "/usr/local/lib/python3.8/dist-packages/huggingface_hub/utils/_validators.py", l
ine 158, in validate_repo_id raise HFValidationError( huggingface_hub.utils._validators.HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': '/buckets/bfsdefault/de
fault/models/token_classification/sschet/biomedical-ner-all/pretrained/sschet/biomedical-ner-all'. Use `repo_type` argument if needed. The above exception was the direct cause of the following except
ion: Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/exasol_transformers_extension/udfs/models/base_model_udf.py", line 95, in get_predictions_from_batch self.ch
eck_cache(model_df) File "/usr/local/lib/python3.8/dist-packages/exasol_transformers_extension/udfs/models/base_model_udf.py", line 192, in check_cache self.last_created_pipeline = self.model_lo
ader.load_models(self.cache_dir, File "/usr/local/lib/python3.8/dist-packages/exasol_transformers_extension/utils/load_local_model.py", line 49, in load_models loaded_model = self._base_model_fa
ctory.from_pretrained(str(model_path)) File "/usr/local/lib/python3.8/dist-packages/transformers/models/auto/auto_factory.py", line 482, in from_pretrained resolved_config_file = cached_file(
File "/usr/local/lib/python3.8/dist-packages/transformers/utils/hub.py", line 462, in cached_file raise EnvironmentError( OSError: Incorrect path_or_model_id: '/buckets/bfsdefault/default/models/t
oken_classification/sschet/biomedical-ner-all/pretrained/sschet/biomedical-ner-all'. Please provide either the path to a local folder or the repo_id of a model on the Hub.
Investigation
Directory Structure created by TE_MODEL_DOWNLOADER_UDF
[root@n11 /]# ls /exa/data/bucketfs/bfsdefault/.dest/default/models/token_classification/sschet/biomedical-ner-all/
config.json model.safetensors special_tokens_map.json tokenizer.json tokenizer_config.json vocab.txt
[root@n11 /]# ls /exa/data/bucketfs/bfsdefault/.dest/default/models/token_classification/sschet/biomedical-ner-all/
Reproduction:
Error Message of TE_TOKEN_CLASSIFICATION_UDF
Investigation
Directory Structure created by TE_MODEL_DOWNLOADER_UDF
Directory structured expected by PredictionUDFs
/buckets/bfsdefault/default/models/token_classification/sschet/biomedical-ner-all/pretrained/sschet/biomedical-ner-all
Acceptance Criteria