huggingface / transformers

🤗 Transformers: State-of-the-art Machine Learning for Pytorch, TensorFlow, and JAX.
https://huggingface.co/transformers
Apache License 2.0
135.55k stars 27.13k forks source link

ValueError: Some specified arguments are not used by the HfArgumentParser: ['model_name_or_path', 'show_model/model001', 'train_type', 'use_lora', 'data_path', 'data/AS_2022_train+test', 'per_device_train_batch_size', '1', 'per_device_eval_batch_size', '1', 'num_train_epochs', '5'] #34306

Open jiqibuaixuexi opened 1 month ago

jiqibuaixuexi commented 1 month ago

System Info

transformers version:'4.45.2' python version: 3.9.20 torch version: '2.4.1+cu124' image

Who can help?

No response

Information

Tasks

Reproduction

### run_show.py
import copy
import logging
import os
from dataclasses import dataclass, field
from functools import partial
from typing import Dict, List, Optional, Sequence

import torch
import transformers

from torch.utils.data import Dataset
from tqdm import tqdm
from transformers import (    
    LlavaForConditionalGeneration,
    LlavaProcessor,
    Trainer,
    TrainingArguments,
)

from show_llava.data import LlavaDataset, TrainLlavaModelCollator
from show_llava.util import print_trainable_parameters

logger = logging.getLogger(__name__)

# import debugpy

# try:
#     # 5678 is the default attach port in the VS Code debug configurations. Unless a host and port are specified, host defaults to 127.0.0.1
#     debugpy.listen(("localhost", 9501))
#     print("Waiting for debugger attach")
#     debugpy.wait_for_client()
# except Exception as e:
#     pass

@dataclass
class ModelArguments:
    model_name_or_path: Optional[str] = field(default="test_model/model001")
    train_type: Optional[str] = field(
        default="use_lora",
        metadata={
            "help": """
            1. use_lora:使用lora训练,
            2. none:全量参数训练;
            3. freeze_vision:只冻结vision_tower进行训练
            """
        },
    )

@dataclass
class DataArguments:
    data_path: str = field(
        default=None, metadata={"help": "Path to the training data."}
    )
    # source_length: int = field(default=128)
    # target_length: int = field(default=512)

def load_model_processor(modelargs: ModelArguments):
    model = LlavaForConditionalGeneration.from_pretrained(
        modelargs.model_name_or_path,
        torch_dtype=torch.bfloat16,
        low_cpu_mem_usage=True,
    )
    processor = LlavaProcessor.from_pretrained(modelargs.model_name_or_path)

    if modelargs.train_type == "use_lora":
        logging.warning("Loading model to Lora")

        from peft import LoraConfig, get_peft_model

        LORA_R = 32
        # LORA_ALPHA = 16
        LORA_DROPOUT = 0.05
        TARGET_MODULES = ["q_proj", "k_proj", "v_proj", "o_proj"]

        config = LoraConfig(
            r=LORA_R,
            # lora_alpha=LORA_ALPHA,
            target_modules=TARGET_MODULES,
            lora_dropout=LORA_DROPOUT,
            bias="none",
            task_type="CAUSAL_LM",
            modules_to_save=["multi_modal_projector"],
        )
        model = get_peft_model(model, config)
        # model.print_trainable_parameters()

    elif modelargs.train_type == "none":
        logging.warning("使用全量参数进行训练")

        pass
    elif modelargs.train_type == "freeze_vision":
        logging.warning("冻结vision_tower网络层,剩下的网络权重进行训练")

        for param in model.vision_tower.parameters():
            param.requires_grad = False
    print_trainable_parameters(model)

    return model, processor

def load_dataset_collator(processor, dataargs: DataArguments):

    llava_dataset = LlavaDataset(
        dataargs.data_path  # "data/liuhaotian/LLaVA-CC3M-Pretrain-595K"
    )
    data_collator = TrainLlavaModelCollator(processor, -100)

    return llava_dataset, data_collator

def train():
    parser = transformers.HfArgumentParser(
        (ModelArguments, DataArguments, TrainingArguments)
    )
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()
    model, processor = load_model_processor(model_args)
    train_dataset, data_collator = load_dataset_collator(processor, data_args)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=None,
        data_collator=data_collator,
    )

    trainer.train()
    trainer.save_state()
    trainer.save_model(output_dir=training_args.output_dir)

if __name__ == "__main__":
    logging.basicConfig(
        format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
        level=logging.INFO,
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    train()
### launch.json
{
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python Debugger: Current File",
            "type": "debugpy",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal",
            "justMyCode": true,
            "args": [
                "--output_dir", "output20241021",
                "model_name_or_path", "show_model/model001",
                "train_type", "use_lora",
                "data_path", "data/AS_2022_train+test",
                "per_device_train_batch_size", "1",
                "per_device_eval_batch_size", "1",
                "num_train_epochs", "5",
            ]
        }
    ]
}

Expected behavior

I can run the following command in CMD without issues:

python run_show.py --output_dir output20241021 --model_name_or_path show_model/model001 --train_type use_lora --data_path data/AS_2022_train+test --per_device_train_batch_size 1 --per_device_eval_batch_size 1 --num_train_epochs 5

However, when I try to debug in the IDE, I encounter the following error:

ValueError: Some specified arguments are not used by the HfArgumentParser: ['model_name_or_path', 'show_model/model001', 'train_type', 'use_lora', 'data_path', 'data/AS_2022_train+test', 'per_device_train_batch_size', '1', 'per_device_eval_batch_size', '1', 'num_train_epochs', '5']
LysandreJik commented 1 month ago

Hey, can you put your stacktrace message here? It's in the title currently but makes it hard to see how to help you with your script

jiqibuaixuexi commented 1 month ago

Sure, here it is:

@.***

From: Lysandre Debut Date: 2024-10-22 22:37 To: huggingface/transformers CC: jiqibuaixuexi; Author Subject: Re: [huggingface/transformers] ValueError: Some specified arguments are not used by the HfArgumentParser: ['model_name_or_path', 'show_model/model001', 'train_type', 'use_lora', 'data_path', 'data/AS_2022_train+test', 'per_device_train_batch_size', '1', 'per_device_eval_batch_size', '1', 'num_train_epochs', '5'] (Issue #34306) Hey, can you put your stacktrace message here? It's in the title currently but makes it hard to see how to help you with your script — Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>

LysandreJik commented 1 month ago

I meant the full error you get locally when you run this

jiqibuaixuexi commented 1 month ago

@LysandreJik
Ok, Here is the screenshot of the full error messages: image

github-actions[bot] commented 5 days ago

This issue has been automatically marked as stale because it has not had recent activity. If you think this still needs to be addressed please comment on this thread.

Please note that issues that do not follow the contributing guidelines are likely to be ignored.