Closed GoldenSeS closed 4 weeks ago
llamafactory 0.9.1.dev0
llamafactory_vlm | [INFO|2024-11-05 07:48:09] llamafactory.hparams.parser:355 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16 llamafactory_vlm | [INFO|configuration_utils.py:677] 2024-11-05 07:48:09,005 >> loading configuration file /app/model/config.json llamafactory_vlm | [INFO|configuration_utils.py:677] 2024-11-05 07:48:09,022 >> loading configuration file /app/model/config.json llamafactory_vlm | [INFO|configuration_utils.py:746] 2024-11-05 07:48:09,026 >> Model config MiniCPMVConfig { llamafactory_vlm | "_name_or_path": "/app/model", llamafactory_vlm | "architectures": [ llamafactory_vlm | "MiniCPMV" llamafactory_vlm | ], llamafactory_vlm | "attention_dropout": 0.0, llamafactory_vlm | "auto_map": { llamafactory_vlm | "AutoConfig": "configuration_minicpm.MiniCPMVConfig", llamafactory_vlm | "AutoModel": "modeling_minicpmv.MiniCPMV", llamafactory_vlm | "AutoModelForCausalLM": "modeling_minicpmv.MiniCPMV" llamafactory_vlm | }, llamafactory_vlm | "batch_vision_input": true, llamafactory_vlm | "bos_token_id": 151643, llamafactory_vlm | "drop_vision_last_layer": false, llamafactory_vlm | "eos_token_id": 151645, llamafactory_vlm | "hidden_act": "silu", llamafactory_vlm | "hidden_size": 3584, llamafactory_vlm | "image_size": 448, llamafactory_vlm | "initializer_range": 0.02, llamafactory_vlm | "intermediate_size": 18944, llamafactory_vlm | "max_position_embeddings": 32768, llamafactory_vlm | "max_window_layers": 28, llamafactory_vlm | "model_type": "minicpmv", llamafactory_vlm | "num_attention_heads": 28, llamafactory_vlm | "num_hidden_layers": 28, llamafactory_vlm | "num_key_value_heads": 4, llamafactory_vlm | "patch_size": 14, llamafactory_vlm | "query_num": 64, llamafactory_vlm | "rms_norm_eps": 1e-06, llamafactory_vlm | "rope_scaling": null, llamafactory_vlm | "rope_theta": 1000000.0, llamafactory_vlm | "slice_config": { llamafactory_vlm | "max_slice_nums": 9, llamafactory_vlm | "model_type": "minicpmv" llamafactory_vlm | }, llamafactory_vlm | "slice_mode": true, llamafactory_vlm | "sliding_window": null, llamafactory_vlm | "tie_word_embeddings": false, llamafactory_vlm | "torch_dtype": "bfloat16", llamafactory_vlm | "transformers_version": "4.46.1", llamafactory_vlm | "use_cache": true, llamafactory_vlm | "use_image_id": true, llamafactory_vlm | "use_sliding_window": false, llamafactory_vlm | "version": 2.6, llamafactory_vlm | "vision_config": { llamafactory_vlm | "hidden_size": 1152, llamafactory_vlm | "image_size": 980, llamafactory_vlm | "intermediate_size": 4304, llamafactory_vlm | "model_type": "siglip_vision_model", llamafactory_vlm | "num_attention_heads": 16, llamafactory_vlm | "num_hidden_layers": 27, llamafactory_vlm | "patch_size": 14 llamafactory_vlm | }, llamafactory_vlm | "vocab_size": 151666 llamafactory_vlm | } llamafactory_vlm | llamafactory_vlm | llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,029 >> loading file vocab.json llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,029 >> loading file merges.txt llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,029 >> loading file tokenizer.json llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,029 >> loading file added_tokens.json llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,029 >> loading file special_tokens_map.json llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,029 >> loading file tokenizer_config.json llamafactory_vlm | [INFO|tokenization_utils_base.py:2475] 2024-11-05 07:48:09,453 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. llamafactory_vlm | [INFO|image_processing_base.py:373] 2024-11-05 07:48:09,454 >> loading configuration file /app/model/preprocessor_config.json llamafactory_vlm | /app/env/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:520: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead llamafactory_vlm | warnings.warn( llamafactory_vlm | [INFO|image_processing_base.py:373] 2024-11-05 07:48:09,464 >> loading configuration file /app/model/preprocessor_config.json llamafactory_vlm | [INFO|image_processing_base.py:429] 2024-11-05 07:48:09,466 >> Image processor MiniCPMVImageProcessor { llamafactory_vlm | "auto_map": { llamafactory_vlm | "AutoImageProcessor": "image_processing_minicpmv.MiniCPMVImageProcessor", llamafactory_vlm | "AutoProcessor": "processing_minicpmv.MiniCPMVProcessor" llamafactory_vlm | }, llamafactory_vlm | "im_end": "</image>", llamafactory_vlm | "im_end_token": "</image>", llamafactory_vlm | "im_id_end": "</image_id>", llamafactory_vlm | "im_id_start": "<image_id>", llamafactory_vlm | "im_start": "<image>", llamafactory_vlm | "im_start_token": "<image>", llamafactory_vlm | "image_feature_size": 64, llamafactory_vlm | "image_processor_type": "MiniCPMVImageProcessor", llamafactory_vlm | "max_slice_nums": 9, llamafactory_vlm | "mean": [ llamafactory_vlm | 0.5, llamafactory_vlm | 0.5, llamafactory_vlm | 0.5 llamafactory_vlm | ], llamafactory_vlm | "norm_mean": [ llamafactory_vlm | 0.5, llamafactory_vlm | 0.5, llamafactory_vlm | 0.5 llamafactory_vlm | ], llamafactory_vlm | "norm_std": [ llamafactory_vlm | 0.5, llamafactory_vlm | 0.5, llamafactory_vlm | 0.5 llamafactory_vlm | ], llamafactory_vlm | "patch_size": 14, llamafactory_vlm | "processor_class": "MiniCPMVProcessor", llamafactory_vlm | "scale_resolution": 448, llamafactory_vlm | "slice_end": "</slice>", llamafactory_vlm | "slice_end_token": "</slice>", llamafactory_vlm | "slice_mode": true, llamafactory_vlm | "slice_start": "<slice>", llamafactory_vlm | "slice_start_token": "<slice>", llamafactory_vlm | "std": [ llamafactory_vlm | 0.5, llamafactory_vlm | 0.5, llamafactory_vlm | 0.5 llamafactory_vlm | ], llamafactory_vlm | "unk": "<unk>", llamafactory_vlm | "unk_token": "<unk>", llamafactory_vlm | "use_image_id": true, llamafactory_vlm | "version": 2.6 llamafactory_vlm | } llamafactory_vlm | llamafactory_vlm | llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,467 >> loading file vocab.json llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,467 >> loading file merges.txt llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,467 >> loading file tokenizer.json llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,467 >> loading file added_tokens.json llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,467 >> loading file special_tokens_map.json llamafactory_vlm | [INFO|tokenization_utils_base.py:2209] 2024-11-05 07:48:09,467 >> loading file tokenizer_config.json llamafactory_vlm | [INFO|tokenization_utils_base.py:2475] 2024-11-05 07:48:09,882 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. llamafactory_vlm | [INFO|processing_utils.py:755] 2024-11-05 07:48:10,584 >> Processor MiniCPMVProcessor: llamafactory_vlm | - image_processor: MiniCPMVImageProcessor { llamafactory_vlm | "auto_map": { llamafactory_vlm | "AutoImageProcessor": "image_processing_minicpmv.MiniCPMVImageProcessor", llamafactory_vlm | "AutoProcessor": "processing_minicpmv.MiniCPMVProcessor" llamafactory_vlm | }, llamafactory_vlm | "im_end": "</image>", llamafactory_vlm | "im_end_token": "</image>", llamafactory_vlm | "im_id_end": "</image_id>", llamafactory_vlm | "im_id_start": "<image_id>", llamafactory_vlm | "im_start": "<image>", llamafactory_vlm | "im_start_token": "<image>", llamafactory_vlm | "image_feature_size": 64, llamafactory_vlm | "image_processor_type": "MiniCPMVImageProcessor", llamafactory_vlm | "max_slice_nums": 9, llamafactory_vlm | "mean": [ llamafactory_vlm | 0.5, llamafactory_vlm | 0.5, llamafactory_vlm | 0.5 llamafactory_vlm | ], llamafactory_vlm | "norm_mean": [ llamafactory_vlm | 0.5, llamafactory_vlm | 0.5, llamafactory_vlm | 0.5 llamafactory_vlm | ], llamafactory_vlm | "norm_std": [ llamafactory_vlm | 0.5, llamafactory_vlm | 0.5, llamafactory_vlm | 0.5 llamafactory_vlm | ], llamafactory_vlm | "patch_size": 14, llamafactory_vlm | "processor_class": "MiniCPMVProcessor", llamafactory_vlm | "scale_resolution": 448, llamafactory_vlm | "slice_end": "</slice>", llamafactory_vlm | "slice_end_token": "</slice>", llamafactory_vlm | "slice_mode": true, llamafactory_vlm | "slice_start": "<slice>", llamafactory_vlm | "slice_start_token": "<slice>", llamafactory_vlm | "std": [ llamafactory_vlm | 0.5, llamafactory_vlm | 0.5, llamafactory_vlm | 0.5 llamafactory_vlm | ], llamafactory_vlm | "unk": "<unk>", llamafactory_vlm | "unk_token": "<unk>", llamafactory_vlm | "use_image_id": true, llamafactory_vlm | "version": 2.6 llamafactory_vlm | } llamafactory_vlm | llamafactory_vlm | - tokenizer: MiniCPMVTokenizerFast(name_or_path='/app/model', vocab_size=151643, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|im_start|>', 'eos_token': '<|im_end|>', 'unk_token': '<unk>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<image>', '</image>', '<ref>', '</ref>', '<box>', '</box>', '<quad>', '</quad>', '<point>', '</point>', '<slice>', '</slice>', '<image_id>', '</image_id>', '<|reserved_special_token_0|>', '<|reserved_special_token_1|>', '<|reserved_special_token_2|>', '<|reserved_special_token_3|>', '<|reserved_special_token_4|>', '<|reserved_special_token_5|>']}, clean_up_tokenization_spaces=False), added_tokens_decoder={ llamafactory_vlm | 128244: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151646: AddedToken("<image>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151647: AddedToken("</image>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151648: AddedToken("<ref>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151649: AddedToken("</ref>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151650: AddedToken("<box>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151651: AddedToken("</box>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151652: AddedToken("<quad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151653: AddedToken("</quad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151654: AddedToken("<point>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151655: AddedToken("</point>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151656: AddedToken("<slice>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151657: AddedToken("</slice>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151658: AddedToken("<image_id>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151659: AddedToken("</image_id>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151660: AddedToken("<|reserved_special_token_0|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151661: AddedToken("<|reserved_special_token_1|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151662: AddedToken("<|reserved_special_token_2|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151663: AddedToken("<|reserved_special_token_3|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151664: AddedToken("<|reserved_special_token_4|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | 151665: AddedToken("<|reserved_special_token_5|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), llamafactory_vlm | } llamafactory_vlm | llamafactory_vlm | { llamafactory_vlm | "processor_class": "MiniCPMVProcessor" llamafactory_vlm | } llamafactory_vlm | llamafactory_vlm | llamafactory_vlm | [INFO|2024-11-05 07:48:10] llamafactory.data.loader:157 >> Loading dataset mllm_demo.json... Generating train split: 0 examples [00:00, ? examples/s] Generating train split: 6 examples [00:00, 320.14 examples/s] Converting format of dataset (num_proc=4): 0%| | 0/6 [00:00<?, ? examples/s] Converting format of dataset (num_proc=4): 83%|████████▎ | 5/6 [00:00<00:00, 38.40 examples/s] Converting format of dataset (num_proc=4): 100%|██████████| 6/6 [00:00<00:00, 26.93 examples/s] Running tokenizer on dataset (num_proc=4): 0%| | 0/6 [00:00<?, ? examples/s] Running tokenizer on dataset (num_proc=4): 0%| | 0/6 [00:00<?, ? examples/s] Running tokenizer on dataset (num_proc=4): 0%| | 0/6 [00:01<?, ? examples/s] Running tokenizer on dataset (num_proc=4): 0%| | 0/6 [00:01<?, ? examples/s] Running tokenizer on dataset (num_proc=4): 0%| | 0/6 [00:01<?, ? examples/s] Running tokenizer on dataset (num_proc=4): 0%| | 0/6 [00:01<?, ? examples/s] llamafactory_vlm | multiprocess.pool.RemoteTraceback: llamafactory_vlm | """ llamafactory_vlm | Traceback (most recent call last): llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/multiprocess/pool.py", line 125, in worker llamafactory_vlm | result = (True, func(*args, **kwds)) llamafactory_vlm | ^^^^^^^^^^^^^^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 678, in _write_generator_to_queue llamafactory_vlm | for i, result in enumerate(func(**kwargs)): llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3458, in _map_single llamafactory_vlm | batch = apply_function_on_filtered_inputs( llamafactory_vlm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3320, in apply_function_on_filtered_inputs llamafactory_vlm | processed_inputs = function(*fn_args, *additional_args, **fn_kwargs) llamafactory_vlm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/llamafactory/data/processors/supervised.py", line 107, in preprocess_supervised_dataset llamafactory_vlm | input_ids, labels = _encode_supervised_example( llamafactory_vlm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/llamafactory/data/processors/supervised.py", line 48, in _encode_supervised_example llamafactory_vlm | messages = template.mm_plugin.process_messages(prompt + response, images, videos, processor) llamafactory_vlm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/llamafactory/data/mm_plugin.py", line 202, in process_messages llamafactory_vlm | self._validate_input(images, videos) llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/llamafactory/data/mm_plugin.py", line 68, in _validate_input llamafactory_vlm | raise ValueError("This model does not support image input.") llamafactory_vlm | ValueError: This model does not support image input. llamafactory_vlm | """ llamafactory_vlm | llamafactory_vlm | The above exception was the direct cause of the following exception: llamafactory_vlm | llamafactory_vlm | Traceback (most recent call last): llamafactory_vlm | File "/app/env/bin/llamafactory-cli", line 8, in <module> llamafactory_vlm | llamafactory_vlm | sys.exit(main()) llamafactory_vlm | ^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/llamafactory/cli.py", line 111, in main llamafactory_vlm | run_exp() llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/llamafactory/train/tuner.py", line 50, in run_exp llamafactory_vlm | run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/llamafactory/train/sft/workflow.py", line 47, in run_sft llamafactory_vlm | dataset_module = get_dataset(template, model_args, data_args, training_args, stage="sft", **tokenizer_module) llamafactory_vlm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/llamafactory/data/loader.py", line 265, in get_dataset llamafactory_vlm | dataset = _get_preprocessed_dataset( llamafactory_vlm | ^^^^^^^^^^^^^^^^^^^^^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/llamafactory/data/loader.py", line 204, in _get_preprocessed_dataset llamafactory_vlm | dataset = dataset.map( llamafactory_vlm | ^^^^^^^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 560, in wrapper llamafactory_vlm | out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) llamafactory_vlm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3147, in map llamafactory_vlm | for rank, done, content in iflatmap_unordered( llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 718, in iflatmap_unordered llamafactory_vlm | [async_result.get(timeout=0.05) for async_result in async_results] llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 718, in <listcomp> llamafactory_vlm | [async_result.get(timeout=0.05) for async_result in async_results] llamafactory_vlm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ llamafactory_vlm | File "/app/env/lib/python3.11/site-packages/multiprocess/pool.py", line 774, in get llamafactory_vlm | raise self._value llamafactory_vlm | ValueError: This model does not support image input. llamafactory_vlm exited with code 1
我尝试微调MiniCPM-V-2_6,训练config如下
### model model_name_or_path: /app/model ### method stage: sft do_train: true finetuning_type: lora lora_target: all ### dataset dataset: mllm_demo template: cpm cutoff_len: 1024 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 4 ### output output_dir: /app/save logging_steps: 1 save_steps: 99999 plot_loss: true overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 learning_rate: 2.0e-4 num_train_epochs: 1 lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000
跟 https://github.com/hiyouga/LLaMA-Factory/issues/5918 类似,但代码已经更新到最新版本
同样的环境微调非视觉模型没有问题
没有支持 minicpm v
Reminder
System Info
llamafactory 0.9.1.dev0
Reproduction
Expected behavior
我尝试微调MiniCPM-V-2_6,训练config如下
Others
跟 https://github.com/hiyouga/LLaMA-Factory/issues/5918 类似,但代码已经更新到最新版本
同样的环境微调非视觉模型没有问题