yuanzhoulvpi2017 / zero_nlp

中文nlp解决方案(大模型、数据、模型、训练、推理)
MIT License
3.04k stars 369 forks source link

Train_LLaVA #200

Open Wfyikczy opened 3 days ago

Wfyikczy commented 3 days ago

deepspeed run.py \ --deepspeed ds_zero2_no_offload.json \ --model_name_or_path /home/wangfeiyu/feiyu/train_llava/show_model/model001 \ --train_type use_lora \ --data_path /home/wangfeiyu/feiyu/LLaVA-CC3M-Pretrain-595K \ --remove_unused_columns false \ --bf16 true \ --fp16 false \ --dataloader_pin_memory True \ --dataloader_num_workers 5 \ --dataloader_persistent_workers True \ --output_dir output_model_user_lora_0705 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ --gradient_accumulation_steps 8 \ --evaluation_strategy "no" \ --save_strategy "epoch" \ --save_total_limit 3 \ --report_to "tensorboard" \ --learning_rate 4e-4 \ --logging_steps 10

训练完以后,想要去推理发生报错
![Snipaste_2024-11-26-01](https://github.com/user-attachments/assets/5a53a343-d61b-4b54-bb38-827eda2d49ea)

raw_model_name_or_path = "/home/wangfeiyu/feiyu/train_llava/show_model/model001"
peft_model_name_or_path = "/home/wangfeiyu/feiyu/train_llava/output_model_user_lora_0705"
model = LlavaForConditionalGeneration.from_pretrained(raw_model_name_or_path,device_map="cuda:1", 
torch_dtype=torch.bfloat16)
model = PeftModel.from_pretrained(model, peft_model_name_or_path, adapter_name="peft_v1")
processor = AutoProcessor.from_pretrained(raw_model_name_or_path)
model.eval()
print('ok')
Wfyikczy commented 3 days ago

Snipaste_2024-11-26-01

Wfyikczy commented 3 days ago
{
    "name": "Exception",
    "message": "data did not match any variant of untagged enum ModelWrapper at line 757281 column 3",
    "stack": "---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
Cell In[2], line 5
      3 model = LlavaForConditionalGeneration.from_pretrained(raw_model_name_or_path,device_map=\"cuda:1\", torch_dtype=torch.bfloat16)
      4 model = PeftModel.from_pretrained(model, peft_model_name_or_path, adapter_name=\"peft_v1\")
----> 5 processor = AutoProcessor.from_pretrained(raw_model_name_or_path)
      6 model.eval()
      7 print('ok')

File ~/anaconda3/lib/python3.11/site-packages/transformers/models/auto/processing_auto.py:313, in AutoProcessor.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
    309     return processor_class.from_pretrained(
    310         pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
    311     )
    312 elif processor_class is not None:
--> 313     return processor_class.from_pretrained(
    314         pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
    315     )
    316 # Last try: we use the PROCESSOR_MAPPING.
    317 elif type(config) in PROCESSOR_MAPPING:

File ~/anaconda3/lib/python3.11/site-packages/transformers/processing_utils.py:466, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
    463 if token is not None:
    464     kwargs[\"token\"] = token
--> 466 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
    467 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
    469 return cls.from_args_and_dict(args, processor_dict, **kwargs)

File ~/anaconda3/lib/python3.11/site-packages/transformers/processing_utils.py:512, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
    509     else:
    510         attribute_class = getattr(transformers_module, class_name)
--> 512     args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
    513 return args

File ~/anaconda3/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:2029, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
   2026     else:
   2027         logger.info(f\"loading file {file_path} from cache at {resolved_vocab_files[file_id]}\")
-> 2029 return cls._from_pretrained(
   2030     resolved_vocab_files,
   2031     pretrained_model_name_or_path,
   2032     init_configuration,
   2033     *init_inputs,
   2034     token=token,
   2035     cache_dir=cache_dir,
   2036     local_files_only=local_files_only,
   2037     _commit_hash=commit_hash,
   2038     _is_local=is_local,
   2039     **kwargs,
   2040 )

File ~/anaconda3/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:2261, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
   2259 # Instantiate the tokenizer.
   2260 try:
-> 2261     tokenizer = cls(*init_inputs, **init_kwargs)
   2262 except OSError:
   2263     raise OSError(
   2264         \"Unable to load vocabulary from file. \"
   2265         \"Please check that the provided vocabulary is accessible and not corrupted.\"
   2266     )

File ~/anaconda3/lib/python3.11/site-packages/transformers/models/clip/tokenization_clip_fast.py:93, in CLIPTokenizerFast.__init__(self, vocab_file, merges_file, tokenizer_file, unk_token, bos_token, eos_token, pad_token, **kwargs)
     82 def __init__(
     83     self,
     84     vocab_file=None,
   (...)
     91     **kwargs,
     92 ):
---> 93     super().__init__(
     94         vocab_file,
     95         merges_file,
     96         tokenizer_file=tokenizer_file,
     97         unk_token=unk_token,
     98         bos_token=bos_token,
     99         eos_token=eos_token,
    100         pad_token=pad_token,
    101         **kwargs,
    102     )
    104     if not isinstance(self.backend_tokenizer.pre_tokenizer, pre_tokenizers.Sequence):
    105         raise ValueError(
    106             \"The `backend_tokenizer` provided does not match the expected format. The CLIP tokenizer has been\"
    107             \" heavily modified from transformers version 4.17.0. You need to convert the tokenizer you are using\"
   (...)
    111             \" transformers.\"
    112         )

File ~/anaconda3/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py:111, in PreTrainedTokenizerFast.__init__(self, *args, **kwargs)
    108     fast_tokenizer = copy.deepcopy(tokenizer_object)
    109 elif fast_tokenizer_file is not None and not from_slow:
    110     # We have a serialization from tokenizers which let us directly build the backend
--> 111     fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
    112 elif slow_tokenizer is not None:
    113     # We need to convert a slow tokenizer to build the backend
    114     fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)

Exception: data did not match any variant of untagged enum ModelWrapper at line 757281 column 3"
}
yuanzhoulvpi2017 commented 3 days ago

我猜测是transformers版本更新的问题,你尝试使用老一点的版本进行调用

Wfyikczy commented 3 days ago

意思是transformers版本要降低嘛

yuanzhoulvpi2017 commented 3 days ago

是的,你调整一下transformers的版本问题。另外,好奇的一点:为啥训练的时候没有报错,但是在推理的时候报错了~