Closed crellian closed 1 year ago
上传下推理代码
上传下推理代码
推理用的就是infer_lora_finetuning.py这个文件,完全没改的
上传下推理代码
推理用的就是infer_lora_finetuning.py这个文件,完全没改的
https://github.com/ssbuild/chatglm_finetuning/blob/b62a95cd989a27d7333e8bbc2e06ba2b00b89f06/infer_lora_finetuning.py#L35 的确, 已经fix , 参考下。 不过lora 不全层应该没效果。
上传下推理代码
推理用的就是infer_lora_finetuning.py这个文件,完全没改的
的确, 已经fix , 参考下。 不过lora 不全层应该没效果。
谢谢!不全层指的是"num_layers"吗?的确推理时出现了问题,response是空的。。
infer_lora_finething.py 打印信息:
...
trainable params: 534880256 || all params: 3383328768 || trainable%: 15.809289982663605
WARNING:deep_training.nlp.models.chatglm:The dtype of attention mask (torch.int64) is not bool
写一个诗歌,关于冬天
晚上睡不着应该怎么办
按照readme的指示进行了int8上的lora微调,训练过程很顺利,但是在推理时报错,疑似是精度参数设置的问题,研究了很久没找出原因..我在原参数上自己调过的参数基本都标出来了,有大佬帮忙看看吗,多谢
参数设置: config/config.json { "architectures": [ "ChatGLMModel" ], "auto_map": { "AutoConfig": "configuration_chatglm.ChatGLMConfig", "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration" }, "bos_token_id": 130004, "eos_token_id": 130005, "mask_token_id": 130000, "gmask_token_id": 130001, "pad_token_id": 3, "hidden_size": 4096, "inner_hidden_size": 16384, "layernorm_epsilon": 1e-05, "max_sequence_length": 2048, "model_type": "chatglm", "num_attention_heads": 32, "num_layers": 14, # 改了这里 "position_encoding_2d": true, "torch_dtype": "float16", "transformers_version": "4.23.1", "use_cache": true, "vocab_size": 130528, "precision": 16, "quantization_bit": 0, "pre_seq_len": null, "prefix_projection": false }
data_utils.py lora_info_args = { 'with_lora': True, # 改了这里 'r': 32, # 改了这里 'target_modules': ['dense','dense_h_to_4h','dense_4h_to_h','query_key_value'], # 改了这里 'target_dtype': 16, 'lora_alpha': 32, 'lora_dropout': 0.1, 'bias': 'none',
}
train_info_args = { 'devices': 1, 'data_backend': 'record', 'model_type': 'chatglm', 'model_name_or_path': 'THUDM/ChatGLM-6B', 'config_name': './config/config.json', 'tokenizer_name': 'THUDM/ChatGLM-6B', 'convert_onnx': False, 'do_train': True, 'train_file': [ './train.json'], 'max_epochs': 5, 'max_steps': -1, 'optimizer': 'lion', # 改了这里 'scheduler_type': 'linear', # 改了这里 'scheduler':None, 'optimizer_betas': (0.9, 0.999), 'train_batch_size': 4, 'eval_batch_size': 2, 'test_batch_size': 2, 'learning_rate': 2e-5,
'adam_epsilon': 1e-8, 'gradient_accumulation_steps': 1, 'max_grad_norm': 1.0, 'weight_decay': 0, 'warmup_steps': 0, 'output_dir': './output', 'max_seq_length': 1024, 'max_target_length': 100, 'use_fast_tokenizer': False, 'do_lower_case': False,
'lora': {lora_info_args}, 'adalora': {adalora_info_args}, }
models.py load_in_8bit = True # 改了这里
train.py训练后运行python infer_lora_finetuning.py Warning和报错: Overriding torch_dtype=None with
response, history = model.chat(tokenizer, "写一个诗歌,关于冬天", history=[],max_length=2048,
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, kwargs)
File "/mnt/chatglm_finetuning/models.py", line 55, in chat
outputs = self.generate(inputs, gen_kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, *kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/transformers/generation/utils.py", line 1485, in generate
return self.sample(
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/transformers/generation/utils.py", line 2524, in sample
outputs = self(
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/deep_training/nlp/models/chatglm/init.py", line 1204, in forward
transformer_outputs = self.transformer(
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, *kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(args, kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/deep_training/nlp/models/chatglm/init.py", line 1019, in forward
layer_ret = layer(
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, *kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/deep_training/nlp/models/chatglm/init.py", line 629, in forward
attention_outputs = self.attention(
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/deep_training/nlp/models/chatglm/init.py", line 455, in forward
mixed_raw_layer = self.query_key_value(hidden_states)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, *kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/deep_training/nlp/layers/lora_v2/layers.py", line 213, in forward
self.lora_Aself.active_adapter
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/root/miniconda3/envs/myconda/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: expected scalar type Float but found Half
torch_dtype=torch.float16
due to requirements ofbitsandbytes
to enable model loading in mixed int8. Either pass torch_dtype=torch.float16 or don't pass this argument at all to remove this warning. Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████| 8/8 [00:34<00:00, 4.35s/it] Some weights of the model checkpoint at THUDM/ChatGLM-6B were not used when initializing MyChatGLMForConditionalGeneration: ... This IS expected if you are initializing MyChatGLMForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). This IS NOT expected if you are initializing MyChatGLMForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). ** lora info trainable params: 534880256 || all params: 3383328768 || trainable%: 15.809289982663605 WARNING:deep_training.nlp.models.chatglm:The dtype of attention mask (torch.int64) is not bool Traceback (most recent call last): File "/mnt/chatglm_finetuning/infer_lora_finetuning.py", line 45, in