jpWang / LiLT

Official PyTorch implementation of LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding (ACL 2022)
MIT License
335 stars 40 forks source link

代码运行问题 #32

Closed Magic-Gj closed 3 months ago

Magic-Gj commented 1 year ago

你好,我是东华大学的一名研究生。 很荣幸能够阅读你这么优秀的文章,并进行复现。现在我在运行你的实例代码时,碰到了一些问题: image 希望你能抽空看看,解决一下。十分感谢!

ZeningLin commented 1 year ago

看样子是某个库版本对不上

检查一下当前的环境配置,按照README里面的要求重新安装一下conda环境

whalefa1I commented 1 year ago

就是按照readme里面的步骤安装的,出现了一样的问题

whalefa1I commented 1 year ago

你好,我是东华大学的一名研究生。 很荣幸能够阅读你这么优秀的文章,并进行复现。现在我在运行你的实例代码时,碰到了一些问题: image 希望你能抽空看看,解决一下。十分感谢!

transformer库的问题,改一下/LiLT/LiLTfinetune里面的init文件就行


from collections import OrderedDict
import types
from transformers import CONFIG_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_NAMES_MAPPING, TOKENIZER_MAPPING
from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, BertConverter, RobertaConverter, XLMRobertaConverter
try:
    from transformers.models.auto.modeling_auto import auto_class_factory
except:
    from transformers.models.auto.modeling_auto import _BaseAutoModelClass, auto_class_update

from .models.LiLTRobertaLike import (
    LiLTRobertaLikeConfig,
    LiLTRobertaLikeForRelationExtraction,
    LiLTRobertaLikeForTokenClassification,
    LiLTRobertaLikeTokenizer,
    LiLTRobertaLikeTokenizerFast,
)

CONFIG_MAPPING.update([("liltrobertalike", LiLTRobertaLikeConfig),])
MODEL_NAMES_MAPPING.update([("liltrobertalike", "LiLTRobertaLike"),])
TOKENIZER_MAPPING.update(
    [
        (LiLTRobertaLikeConfig, (LiLTRobertaLikeTokenizer, LiLTRobertaLikeTokenizerFast)),
    ]
)

with open('tag.txt', 'r') as tagf:
    TAG = tagf.read().lower()
assert TAG == 'monolingual' or TAG == 'multilingual', 'TAG is wrong. It should be monolingual or multilingual.'
if TAG == 'monolingual':
    SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": RobertaConverter,})
elif TAG == 'multilingual':
    SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": XLMRobertaConverter,})

MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update(
    [(LiLTRobertaLikeConfig, LiLTRobertaLikeForTokenClassification),]
)

MODEL_FOR_RELATION_EXTRACTION_MAPPING = OrderedDict(
    [(LiLTRobertaLikeConfig, LiLTRobertaLikeForRelationExtraction),]
)

try:
    AutoModelForTokenClassification = auto_class_factory(
        "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification")
except:
    cls = types.new_class("AutoModelForTokenClassification", (_BaseAutoModelClass,))
    cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    cls.__name__ = "AutoModelForTokenClassification"

    AutoModelForTokenClassification = auto_class_update(cls, head_doc="token classification")

try:
    AutoModelForRelationExtraction = auto_class_factory(
        "AutoModelForRelationExtraction", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="relation extraction")
except:
    cls = types.new_class("AutoModelForRelationExtraction", (_BaseAutoModelClass,))
    cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    cls.__name__ = "AutoModelForRelationExtraction"

    AutoModelForRelationExtraction = auto_class_update(cls, head_doc="relation extraction")

# AutoModelForTokenClassification = auto_class_factory(
#     "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification"
# )

# AutoModelForRelationExtraction = auto_class_factory(
#     "AutoModelForRelationExtraction", MODEL_FOR_RELATION_EXTRACTION_MAPPING, head_doc="relation extraction"
# )
whalefa1I commented 1 year ago

我的transformers库是4.25.1的

Magic-Gj commented 1 year ago

在修改过Init.py文件,并将transformers库版本更新到4.25.1后。 我重新运行Semantic Entity Recognition on FUNSD的shell脚本文件 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 examples/run_funsd.py \ --model_name_or_path lilt-roberta-en-base \ --tokenizer_name roberta-base \ --output_dir ser_funsd_lilt-roberta-en-base \ --do_train \ --do_predict \ --max_steps 2000 \ --per_device_train_batch_size 8 \ --warmup_ratio 0.1 \ --fp16 出现以下报错,貌似是程序中的关键字中断了程序运行。 `***** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.


WARNING:main:Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: True INFO:main:Training/evaluation parameters TrainingArguments( _n_gpu=1, adafactor=False, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=False, bf16=False, bf16_full_eval=False, data_seed=None, dataloader_drop_last=False, dataloader_num_workers=0, dataloader_pin_memory=True, ddp_bucket_cap_mb=None, ddp_find_unused_parameters=None, ddp_timeout=1800, debug=[], deepspeed=None, disable_tqdm=False, do_eval=False, do_predict=True, do_train=True, eval_accumulation_steps=None, eval_delay=0, eval_steps=None, evaluation_strategy=no, fp16=True, fp16_backend=auto, fp16_full_eval=False, fp16_opt_level=O1, fsdp=[], fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap=None, full_determinism=False, gradient_accumulation_steps=1, gradient_checkpointing=False, greater_is_better=None, group_by_length=False, half_precision_backend=auto, hub_model_id=None, hub_private_repo=False, hub_strategy=every_save, hub_token=, ignore_data_skip=False, include_inputs_for_metrics=False, jit_mode_eval=False, label_names=None, label_smoothing_factor=0.0, learning_rate=5e-05, length_column_name=length, load_best_model_at_end=False, local_rank=0, log_level=passive, log_level_replica=passive, log_on_each_node=True, logging_dir=ser_funsd_lilt-roberta-en-base/runs/Feb20_16-07-29_dgx, logging_first_step=False, logging_nan_inf_filter=True, logging_steps=500, logging_strategy=steps, lr_scheduler_type=linear, max_grad_norm=1.0, max_steps=2000, metric_for_best_model=None, mp_parameters=, no_cuda=False, num_train_epochs=3.0, optim=adamw_hf, optim_args=None, output_dir=ser_funsd_lilt-roberta-en-base, overwrite_output_dir=False, past_index=-1, per_device_eval_batch_size=8, per_device_train_batch_size=8, prediction_loss_only=False, push_to_hub=False, push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=, ray_scope=last, remove_unused_columns=True, report_to=['tensorboard'], resume_from_checkpoint=None, run_name=ser_funsd_lilt-roberta-en-base, save_on_each_node=False, save_steps=500, save_strategy=steps, save_total_limit=None, seed=42, sharded_ddp=[], skip_memory_metrics=True, tf32=None, torchdynamo=None, tpu_metrics_debug=False, tpu_num_cores=None, use_ipex=False, use_legacy_prediction_loop=False, use_mps_device=False, warmup_ratio=0.1, warmup_steps=0, weight_decay=0.0, xpu_backend=None, ) Downloading and preparing dataset funsd/funsd (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106... WARNING:main:Process rank: 2, device: cuda:2, n_gpu: 1distributed training: True, 16-bits training: True WARNING:main:Process rank: 1, device: cuda:1, n_gpu: 1distributed training: True, 16-bits training: True WARNING:main:Process rank: 3, device: cuda:3, n_gpu: 1distributed training: True, 16-bits training: True Downloading: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16.8M/16.8M [09:12<00:00, 30.5kB/s] Dataset funsd downloaded and prepared to /home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106. Subsequent calls will reuse this data. WARNING:datasets.builder:Reusing dataset funsd (/home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106) WARNING:datasets.builder:Reusing dataset funsd (/home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106) [INFO|configuration_utils.py:652] 2023-02-20 16:16:54,487 >> loading configuration file lilt-roberta-en-base/config.json Traceback (most recent call last): File "examples/run_funsd.py", line 369, in main() File "examples/run_funsd.py", line 135, in main use_auth_token=True if model_args.use_auth_token else None, File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 829, in from_pretrained config_class = CONFIG_MAPPING[config_dict["model_type"]] File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 536, in getitem raise KeyError(key) KeyError: 'liltrobertalike' Traceback (most recent call last): Traceback (most recent call last): File "examples/run_funsd.py", line 369, in File "examples/run_funsd.py", line 369, in main()main()

File "examples/run_funsd.py", line 135, in main File "examples/run_funsd.py", line 135, in main use_auth_token=True if model_args.use_auth_token else None,
use_auth_token=True if model_args.use_auth_token else None, File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 829, in from_pretrained

File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 829, in from_pretrained config_class = CONFIG_MAPPING[config_dict["model_type"]]
config_class = CONFIG_MAPPING[config_dict["model_type"]] File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 536, in getitem File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 536, in getitem raise KeyError(key)raise KeyError(key)

KeyError: KeyError'liltrobertalike' : 'liltrobertalike' WARNING:datasets.builder:Reusing dataset funsd (/home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106) Traceback (most recent call last): File "examples/run_funsd.py", line 369, in main() File "examples/run_funsd.py", line 135, in main use_auth_token=True if model_args.use_auth_token else None, File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 829, in from_pretrained config_class = CONFIG_MAPPING[config_dict["model_type"]] File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 536, in getitem raise KeyError(key) KeyError: 'liltrobertalike' Killing subprocess 132879 Killing subprocess 132880 Killing subprocess 132881 Killing subprocess 132882 Traceback (most recent call last): File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/runpy.py", line 193, in _run_module_as_main "main", mod_spec) File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/runpy.py", line 85, in _run_code exec(code, run_globals) File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/torch/distributed/launch.py", line 340, in main() File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/torch/distributed/launch.py", line 326, in main sigkill_handler(signal.SIGTERM, None) # not coming back File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/torch/distributed/launch.py", line 301, in sigkill_handler raise subprocess.CalledProcessError(returncode=last_return_code, cmd=cmd) subprocess.CalledProcessError: Command '['/home/pm-gaojun/anaconda3/envs/lilt/bin/python', '-u', 'examples/run_funsd.py', '--local_rank=3', '--model_name_or_path', 'lilt-roberta-en-base', '--tokenizer_name', 'roberta-base', '--output_dir', 'ser_funsd_lilt-roberta-en-base', '--do_train', '--do_predict', '--max_steps', '2000', '--per_device_train_batch_size', '8', '--warmup_ratio', '0.1', '--fp16']' returned non-zero exit status 1. `

lvbohui commented 11 months ago

请问后来解决关键字的问题了吗,我最近在尝试用这个模型, 但是也有同样的问题

cactusgame commented 9 months ago

你好,我是东华大学的一名研究生。 很荣幸能够阅读你这么优秀的文章,并进行复现。现在我在运行你的实例代码时,碰到了一些问题: image 希望你能抽空看看,解决一下。十分感谢!

transformer库的问题,改一下/LiLT/LiLTfinetune里面的init文件就行


from collections import OrderedDict
import types
from transformers import CONFIG_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_NAMES_MAPPING, TOKENIZER_MAPPING
from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, BertConverter, RobertaConverter, XLMRobertaConverter
try:
    from transformers.models.auto.modeling_auto import auto_class_factory
except:
    from transformers.models.auto.modeling_auto import _BaseAutoModelClass, auto_class_update

from .models.LiLTRobertaLike import (
    LiLTRobertaLikeConfig,
    LiLTRobertaLikeForRelationExtraction,
    LiLTRobertaLikeForTokenClassification,
    LiLTRobertaLikeTokenizer,
    LiLTRobertaLikeTokenizerFast,
)

CONFIG_MAPPING.update([("liltrobertalike", LiLTRobertaLikeConfig),])
MODEL_NAMES_MAPPING.update([("liltrobertalike", "LiLTRobertaLike"),])
TOKENIZER_MAPPING.update(
    [
        (LiLTRobertaLikeConfig, (LiLTRobertaLikeTokenizer, LiLTRobertaLikeTokenizerFast)),
    ]
)

with open('tag.txt', 'r') as tagf:
    TAG = tagf.read().lower()
assert TAG == 'monolingual' or TAG == 'multilingual', 'TAG is wrong. It should be monolingual or multilingual.'
if TAG == 'monolingual':
    SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": RobertaConverter,})
elif TAG == 'multilingual':
    SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": XLMRobertaConverter,})

MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update(
    [(LiLTRobertaLikeConfig, LiLTRobertaLikeForTokenClassification),]
)

MODEL_FOR_RELATION_EXTRACTION_MAPPING = OrderedDict(
    [(LiLTRobertaLikeConfig, LiLTRobertaLikeForRelationExtraction),]
)

try:
    AutoModelForTokenClassification = auto_class_factory(
        "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification")
except:
    cls = types.new_class("AutoModelForTokenClassification", (_BaseAutoModelClass,))
    cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    cls.__name__ = "AutoModelForTokenClassification"

    AutoModelForTokenClassification = auto_class_update(cls, head_doc="token classification")

try:
    AutoModelForRelationExtraction = auto_class_factory(
        "AutoModelForRelationExtraction", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="relation extraction")
except:
    cls = types.new_class("AutoModelForRelationExtraction", (_BaseAutoModelClass,))
    cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    cls.__name__ = "AutoModelForRelationExtraction"

    AutoModelForRelationExtraction = auto_class_update(cls, head_doc="relation extraction")

# AutoModelForTokenClassification = auto_class_factory(
#     "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification"
# )

# AutoModelForRelationExtraction = auto_class_factory(
#     "AutoModelForRelationExtraction", MODEL_FOR_RELATION_EXTRACTION_MAPPING, head_doc="relation extraction"
# )

第19行和20行,这里是为了更新transformer内的定义,因为transformer==4.5.1时,还没有lilt模型。

CONFIG_MAPPING.update([("liltrobertalike", LiLTRobertaLikeConfig),("lilt", LiLTRobertaLikeConfig),])
MODEL_NAMES_MAPPING.update([("liltrobertalike", "LiLTRobertaLike"),("lilt", "LiLTRobertaLike"),])

另外,楼主原本的问题,是packaging包的版本造成的,降级到packaging==21.0即可

speakstone commented 9 months ago

pip install packaging==21.3 ok