Closed Magic-Gj closed 3 months ago
看样子是某个库版本对不上
检查一下当前的环境配置,按照README里面的要求重新安装一下conda环境
就是按照readme里面的步骤安装的,出现了一样的问题
你好,我是东华大学的一名研究生。 很荣幸能够阅读你这么优秀的文章,并进行复现。现在我在运行你的实例代码时,碰到了一些问题: 希望你能抽空看看,解决一下。十分感谢!
transformer库的问题,改一下/LiLT/LiLTfinetune里面的init文件就行
from collections import OrderedDict
import types
from transformers import CONFIG_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_NAMES_MAPPING, TOKENIZER_MAPPING
from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, BertConverter, RobertaConverter, XLMRobertaConverter
try:
from transformers.models.auto.modeling_auto import auto_class_factory
except:
from transformers.models.auto.modeling_auto import _BaseAutoModelClass, auto_class_update
from .models.LiLTRobertaLike import (
LiLTRobertaLikeConfig,
LiLTRobertaLikeForRelationExtraction,
LiLTRobertaLikeForTokenClassification,
LiLTRobertaLikeTokenizer,
LiLTRobertaLikeTokenizerFast,
)
CONFIG_MAPPING.update([("liltrobertalike", LiLTRobertaLikeConfig),])
MODEL_NAMES_MAPPING.update([("liltrobertalike", "LiLTRobertaLike"),])
TOKENIZER_MAPPING.update(
[
(LiLTRobertaLikeConfig, (LiLTRobertaLikeTokenizer, LiLTRobertaLikeTokenizerFast)),
]
)
with open('tag.txt', 'r') as tagf:
TAG = tagf.read().lower()
assert TAG == 'monolingual' or TAG == 'multilingual', 'TAG is wrong. It should be monolingual or multilingual.'
if TAG == 'monolingual':
SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": RobertaConverter,})
elif TAG == 'multilingual':
SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": XLMRobertaConverter,})
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update(
[(LiLTRobertaLikeConfig, LiLTRobertaLikeForTokenClassification),]
)
MODEL_FOR_RELATION_EXTRACTION_MAPPING = OrderedDict(
[(LiLTRobertaLikeConfig, LiLTRobertaLikeForRelationExtraction),]
)
try:
AutoModelForTokenClassification = auto_class_factory(
"AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification")
except:
cls = types.new_class("AutoModelForTokenClassification", (_BaseAutoModelClass,))
cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
cls.__name__ = "AutoModelForTokenClassification"
AutoModelForTokenClassification = auto_class_update(cls, head_doc="token classification")
try:
AutoModelForRelationExtraction = auto_class_factory(
"AutoModelForRelationExtraction", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="relation extraction")
except:
cls = types.new_class("AutoModelForRelationExtraction", (_BaseAutoModelClass,))
cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
cls.__name__ = "AutoModelForRelationExtraction"
AutoModelForRelationExtraction = auto_class_update(cls, head_doc="relation extraction")
# AutoModelForTokenClassification = auto_class_factory(
# "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification"
# )
# AutoModelForRelationExtraction = auto_class_factory(
# "AutoModelForRelationExtraction", MODEL_FOR_RELATION_EXTRACTION_MAPPING, head_doc="relation extraction"
# )
我的transformers库是4.25.1的
在修改过Init.py文件,并将transformers库版本更新到4.25.1后。
我重新运行Semantic Entity Recognition on FUNSD的shell脚本文件
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 examples/run_funsd.py \ --model_name_or_path lilt-roberta-en-base \ --tokenizer_name roberta-base \ --output_dir ser_funsd_lilt-roberta-en-base \ --do_train \ --do_predict \ --max_steps 2000 \ --per_device_train_batch_size 8 \ --warmup_ratio 0.1 \ --fp16
出现以下报错,貌似是程序中的关键字中断了程序运行。
`*****
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
WARNING:main:Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: True
INFO:main:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=no,
fp16=True,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=
File "examples/run_funsd.py", line 135, in main
File "examples/run_funsd.py", line 135, in main
use_auth_token=True if model_args.use_auth_token else None,
use_auth_token=True if model_args.use_auth_token else None, File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 829, in from_pretrained
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 829, in from_pretrained
config_class = CONFIG_MAPPING[config_dict["model_type"]]
config_class = CONFIG_MAPPING[config_dict["model_type"]]
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 536, in getitem
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 536, in getitem
raise KeyError(key)raise KeyError(key)
KeyError: KeyError'liltrobertalike'
: 'liltrobertalike'
WARNING:datasets.builder:Reusing dataset funsd (/home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106)
Traceback (most recent call last):
File "examples/run_funsd.py", line 369, in
请问后来解决关键字的问题了吗,我最近在尝试用这个模型, 但是也有同样的问题
你好,我是东华大学的一名研究生。 很荣幸能够阅读你这么优秀的文章,并进行复现。现在我在运行你的实例代码时,碰到了一些问题: 希望你能抽空看看,解决一下。十分感谢!
transformer库的问题,改一下/LiLT/LiLTfinetune里面的init文件就行
from collections import OrderedDict import types from transformers import CONFIG_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_NAMES_MAPPING, TOKENIZER_MAPPING from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, BertConverter, RobertaConverter, XLMRobertaConverter try: from transformers.models.auto.modeling_auto import auto_class_factory except: from transformers.models.auto.modeling_auto import _BaseAutoModelClass, auto_class_update from .models.LiLTRobertaLike import ( LiLTRobertaLikeConfig, LiLTRobertaLikeForRelationExtraction, LiLTRobertaLikeForTokenClassification, LiLTRobertaLikeTokenizer, LiLTRobertaLikeTokenizerFast, ) CONFIG_MAPPING.update([("liltrobertalike", LiLTRobertaLikeConfig),]) MODEL_NAMES_MAPPING.update([("liltrobertalike", "LiLTRobertaLike"),]) TOKENIZER_MAPPING.update( [ (LiLTRobertaLikeConfig, (LiLTRobertaLikeTokenizer, LiLTRobertaLikeTokenizerFast)), ] ) with open('tag.txt', 'r') as tagf: TAG = tagf.read().lower() assert TAG == 'monolingual' or TAG == 'multilingual', 'TAG is wrong. It should be monolingual or multilingual.' if TAG == 'monolingual': SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": RobertaConverter,}) elif TAG == 'multilingual': SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": XLMRobertaConverter,}) MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update( [(LiLTRobertaLikeConfig, LiLTRobertaLikeForTokenClassification),] ) MODEL_FOR_RELATION_EXTRACTION_MAPPING = OrderedDict( [(LiLTRobertaLikeConfig, LiLTRobertaLikeForRelationExtraction),] ) try: AutoModelForTokenClassification = auto_class_factory( "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification") except: cls = types.new_class("AutoModelForTokenClassification", (_BaseAutoModelClass,)) cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING cls.__name__ = "AutoModelForTokenClassification" AutoModelForTokenClassification = auto_class_update(cls, head_doc="token classification") try: AutoModelForRelationExtraction = auto_class_factory( "AutoModelForRelationExtraction", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="relation extraction") except: cls = types.new_class("AutoModelForRelationExtraction", (_BaseAutoModelClass,)) cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING cls.__name__ = "AutoModelForRelationExtraction" AutoModelForRelationExtraction = auto_class_update(cls, head_doc="relation extraction") # AutoModelForTokenClassification = auto_class_factory( # "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification" # ) # AutoModelForRelationExtraction = auto_class_factory( # "AutoModelForRelationExtraction", MODEL_FOR_RELATION_EXTRACTION_MAPPING, head_doc="relation extraction" # )
第19行和20行,这里是为了更新transformer内的定义,因为transformer==4.5.1时,还没有lilt模型。
CONFIG_MAPPING.update([("liltrobertalike", LiLTRobertaLikeConfig),("lilt", LiLTRobertaLikeConfig),])
MODEL_NAMES_MAPPING.update([("liltrobertalike", "LiLTRobertaLike"),("lilt", "LiLTRobertaLike"),])
另外,楼主原本的问题,是packaging包的版本造成的,降级到packaging==21.0即可
pip install packaging==21.3 ok
你好,我是东华大学的一名研究生。 很荣幸能够阅读你这么优秀的文章,并进行复现。现在我在运行你的实例代码时,碰到了一些问题: 希望你能抽空看看,解决一下。十分感谢!