Open Empress7211 opened 2 weeks ago
Hi @Empress7211
想给您确认下,pretrained_model_name_or_path = '/home/intern2/modal/Shanghai_AI_Laboratory/internlm-chat-7b'
这个路径下是 Internlm1 还是 Internlm2 呢?
如果是Internlm2的话,还需要另外确认下模型的版本,因为Internlm2模型的config里是有 rope_theta
这个配置的 (参考这里)。检查方法是查看 /home/intern2/modal/Shanghai_AI_Laboratory/internlm-chat-7b/config.json
里的内容是否跟 https://huggingface.co/internlm/internlm2-chat-7b/blob/main/config.json 这个文件一致
这是我修改的微调config脚本文件:
Copyright (c) OpenMMLab. All rights reserved.
import torch from datasets import load_dataset from mmengine.dataset import DefaultSampler from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, LoggerHook, ParamSchedulerHook) from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR from peft import LoraConfig from torch.optim import AdamW from transformers import (AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig)
from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn from xtuner.dataset.map_fns import oasst1_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop from xtuner.model import SupervisedFinetune from xtuner.utils import PROMPT_TEMPLATE
#######################################################################
PART 1 Settings
#######################################################################
Model
pretrained_model_name_or_path = '/home/intern2/modal/Shanghai_AI_Laboratory/internlm-chat-7b' use_varlen_attn = False
Data
data_path = '/home/intern2/work/dataset/muxue_dataset.json' prompt_template = PROMPT_TEMPLATE.internlm_chat max_length = 2048 pack_to_max_length = True
Scheduler & Optimizer
batch_size = 1 # per_device accumulative_counts = 16 dataloader_num_workers = 0 max_epochs = 3 optim_type = AdamW lr = 2e-4 betas = (0.9, 0.999) weight_decay = 0 max_norm = 1 # grad clip warmup_ratio = 0.03
Save
save_steps = 500 save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited)
Evaluate the generation performance during the training
evaluation_freq = 500 SYSTEM = '' evaluation_inputs = [ '沐雪的功能是什么?', '很担心雪雪的身体', '我失恋了...', '雪雪我有点小困了捏(摸摸雪雪的头)', '我今天起床的时候感觉有点头晕,你有什么解决方案嘛', '我要不要和暗恋对象告白?' ]
#######################################################################
PART 2 Model & Tokenizer
####################################################################### tokenizer = dict( type=AutoTokenizer.from_pretrained, pretrained_model_name_or_path=pretrained_model_name_or_path, trust_remote_code=True, padding_side='right')
model = dict( type=SupervisedFinetune, use_varlen_attn=use_varlen_attn, llm=dict( type=AutoModelForCausalLM.from_pretrained, pretrained_model_name_or_path=pretrained_model_name_or_path, trust_remote_code=True, torch_dtype=torch.float16, quantization_config=dict( type=BitsAndBytesConfig, load_in_4bit=True, load_in_8bit=False, llm_int8_threshold=6.0, llm_int8_has_fp16_weight=False, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type='nf4')), lora=dict( type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.1, bias='none', task_type='CAUSAL_LM'))
#######################################################################
PART 3 Dataset & Dataloader
####################################################################### train_dataset = dict( type=process_hf_dataset,
dataset=dict(type=load_dataset, path=data_path),
train_dataloader = dict( batch_size=batch_size, num_workers=dataloader_num_workers, dataset=train_dataset, sampler=dict(type=DefaultSampler, shuffle=True), collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn))
#######################################################################
PART 4 Scheduler & Optimizer
#######################################################################
optimizer
optim_wrapper = dict( type=AmpOptimWrapper, optimizer=dict( type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), accumulative_counts=accumulative_counts, loss_scale='dynamic', dtype='float16')
learning policy
More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501
param_scheduler = [ dict( type=LinearLR, start_factor=1e-5, by_epoch=True, begin=0, end=warmup_ratio max_epochs, convert_to_iter_based=True), dict( type=CosineAnnealingLR, eta_min=0.0, by_epoch=True, begin=warmup_ratio max_epochs, end=max_epochs, convert_to_iter_based=True) ]
train, val, test setting
train_cfg = dict(type=TrainLoop, max_epochs=max_epochs)
#######################################################################
PART 5 Runtime
#######################################################################
Log the dialogue periodically during the training process, optional
custom_hooks = [ dict(type=DatasetInfoHook, tokenizer=tokenizer), dict( type=EvaluateChatHook, tokenizer=tokenizer, every_n_iters=evaluation_freq, evaluation_inputs=evaluation_inputs, system=SYSTEM, prompt_template=prompt_template) ]
if use_varlen_attn: custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)]
configure default hooks
default_hooks = dict(
record the time of every iteration.
)
configure environment
env_cfg = dict(
whether to enable cudnn benchmark
)
set visualizer
visualizer = None
set log level
log_level = 'INFO'
load from which checkpoint
load_from = None
whether to resume training from the loaded checkpoint
resume = False
Defaults to use random seed and disable
deterministic
randomness = dict(seed=None, deterministic=False)
set log processor
log_processor = dict(by_epoch=False)
这是报错信息:
Traceback (most recent call last): File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/xtuner/tools/train.py", line 360, in
main()
File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/xtuner/tools/train.py", line 356, in main
runner.train()
File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/mmengine/runner/_flexible_runner.py", line 1182, in train
self.strategy.prepare(
File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/mmengine/_strategy/deepspeed.py", line 381, in prepare
model = self.build_model(model)
File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/mmengine/_strategy/base.py", line 306, in build_model
model = MODELS.build(model)
File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build
return self.build_func(cfg, *args, kwargs, registry=self)
File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 232, in build_model_from_cfg
return build_from_cfg(cfg, registry, default_args)
File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
obj = obj_cls(args) # type: ignore
File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/xtuner/model/sft.py", line 93, in init
dispatch_modules(self.llm, use_varlen_attn=use_varlen_attn)
File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/xtuner/model/modules/dispatch/init.py", line 274, in dispatch_modules
replace_rote(model)
File "/home/intern2/anaconda3/envs/Qwen1.5/lib/python3.10/site-packages/xtuner/model/modules/dispatch/init.py", line 233, in replace_rote
assert hasattr(model.config, 'rope_theta'), \
AssertionError:
rope_theta
should be in the model config.