cardiffnlp / xlm-t

Repository for XLM-T, a framework for evaluating multilingual language models on Twitter data
Apache License 2.0
145 stars 23 forks source link

AttributeError: module 'google.protobuf.descriptor' has no attribute '_internal_create_key' #12

Open k3ybladewielder opened 1 year ago

k3ybladewielder commented 1 year ago

Hi Im using transformers version '4.26.1' on databricks. The code below return the following error

Code:

model_path = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)

Error:

AttributeError: module 'google.protobuf.descriptor' has no attribute '_internal_create_key'
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<command-798773577171854> in <module>
     15 
     16 model_path = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
---> 17 sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)
     18 
     19 df_group_notna['sentiment'] = df_group_notna['Feedback'].apply(lambda x: sentiment_task(x)[0]['label'])

/databricks/python/lib/python3.7/site-packages/transformers/pipelines/__init__.py in pipeline(task, model, config, tokenizer, feature_extractor, framework, revision, use_fast, use_auth_token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)
    827 
    828             tokenizer = AutoTokenizer.from_pretrained(
--> 829                 tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
    830             )
    831 
/databricks/python/lib/python3.7/site-packages/transformers/models/auto/tokenization_auto.py in from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
    674             tokenizer_class_py, tokenizer_class_fast = TOKENIZER_MAPPING[type(config)]
    675             if tokenizer_class_fast and (use_fast or tokenizer_class_py is None):
--> 676                 return tokenizer_class_fast.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
    677             else:
    678                 if tokenizer_class_py is not None:

/databricks/python/lib/python3.7/site-packages/transformers/tokenization_utils_base.py in from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
   1811             local_files_only=local_files_only,
   1812             _commit_hash=commit_hash,
-> 1813             **kwargs,
   1814         )
   1815 

/databricks/python/lib/python3.7/site-packages/transformers/tokenization_utils_base.py in _from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, *init_inputs, **kwargs)
   1957         # Instantiate tokenizer.
   1958         try:
-> 1959             tokenizer = cls(*init_inputs, **init_kwargs)
   1960         except OSError:
   1961             raise OSError(

/databricks/python/lib/python3.7/site-packages/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py in __init__(self, vocab_file, tokenizer_file, bos_token, eos_token, sep_token, cls_token, unk_token, pad_token, mask_token, **kwargs)
    163             pad_token=pad_token,
    164             mask_token=mask_token,
--> 165             **kwargs,
    166         )
    167 

/databricks/python/lib/python3.7/site-packages/transformers/tokenization_utils_fast.py in __init__(self, *args, **kwargs)
    112         elif slow_tokenizer is not None:
    113             # We need to convert a slow tokenizer to build the backend
--> 114             fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)
    115         elif self.slow_tokenizer_class is not None:
    116             # We need to create and convert a slow tokenizer to build the backend

/databricks/python/lib/python3.7/site-packages/transformers/convert_slow_tokenizer.py in convert_slow_tokenizer(transformer_tokenizer)
   1160     converter_class = SLOW_TO_FAST_CONVERTERS[tokenizer_class_name]
   1161 
-> 1162     return converter_class(transformer_tokenizer).converted()

/databricks/python/lib/python3.7/site-packages/transformers/convert_slow_tokenizer.py in __init__(self, *args)
    436         super().__init__(*args)
    437 
--> 438         from .utils import sentencepiece_model_pb2 as model_pb2
    439 
    440         m = model_pb2.ModelProto()

/databricks/python/lib/python3.7/site-packages/transformers/utils/sentencepiece_model_pb2.py in <module>
     32     syntax="proto2",
     33     serialized_options=b"H\003",
---> 34     create_key=_descriptor._internal_create_key,
     35     serialized_pb=(
     36         b'\n\x19sentencepiece_model.proto\x12\rsentencepiece"\xa1\n\n\x0bTrainerSpec\x12\r\n\x05input\x18\x01'

AttributeError: module 'google.protobuf.descriptor' has no attribute '_internal_create_key'