Open alexhegit opened 1 year ago
参考https://github.com/imClumsyPanda/langchain-ChatGLM/issues/732解决 按照chatglm2-6b的模型配置文件中改的层映射,这段代码可以解决chatglm2-6b的多卡部署的问题,除了chatglm2-6b外,vicuna-7b和vicuna-13b也存在类似问题
if self.lora:
layer_prefix = 'base_model.model.transformer'
device_map = {f'{layer_prefix}.word_embeddings': 0,
f'{layer_prefix}.final_layernorm': 0, 'lm_head': 0,
f'base_model.model.lm_head': 0, }
elif self.model_name.find("chatglm2-6b") != -1:
layer_prefix = 'transformer.encoder'
device_map = {'transformer.embedding.word_embeddings': 0,
'transformer.output_layer': 0,
'transformer.rotary_pos_emb': 0,
'transformer.encoder.final_layernorm': 0,
'lm_head': 0,
'base_model.model.lm_head': 0, }
else:
layer_prefix = 'transformer'
device_map = {f'{layer_prefix}.word_embeddings': 0,
f'{layer_prefix}.final_layernorm': 0, 'lm_head': 0,
f'base_model.model.lm_head': 0, }
Is there an existing issue for this?
Current Behavior
I am trying use 2 gpus to run the chatglm2-6b model with the same script which could run chatglm-6b successfully. The only one modification is just change the model file like that:
model_path = "../chatglm2-6b-int4-model"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) model = load_model_on_gpus(model_path, num_gpus=2)
Expected Behavior
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /home/alex/chatGLM-PJ/chatGLM-6B/cli_demo_2gpu.py:49 in │
│ │
│ 46 #model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cu │
│ 47 │
│ 48 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) │
│ ❱ 49 model = load_model_on_gpus(model_path, num_gpus=2) │
│ 50 #model = load_model_on_gpus(model_path, num_gpus=2, device_map=fix_configure_device_map( │
│ 51 │
│ 52 model = model.eval() │
│ │
│ /home/alex/chatGLM-PJ/chatGLM-6B/utils.py:80 in load_model_on_gpus │
│ │
│ 77 │ │ │ device_map = fix_configure_device_map(num_gpus) │
│ 78 │ │ │
│ 79 │ │ │
│ ❱ 80 │ │ model = dispatch_model(model, device_map=device_map) │
│ 81 │ │
│ 82 │ return model │
│ 83 │
│ │
│ /home/alex/anaconda3/envs/chatGLM/lib/python3.9/site-packages/accelerate/big_modeling.py:327 in │
│ dispatch_model │
│ │
│ 324 │ if not is_torch_version(">=", "1.9.0"): │
│ 325 │ │ raise NotImplementedError("Model dispatching requires torch >= 1.9.0") │
│ 326 │ # Error early if the device map is incomplete. │
│ ❱ 327 │ check_device_map(model, device_map) │
│ 328 │ │
│ 329 │ if main_device is None: │
│ 330 │ │ if set(device_map.values()) == {"cpu"} or set(device_map.values()) == {"cpu", "d │
│ │
│ /home/alex/anaconda3/envs/chatGLM/lib/python3.9/site-packages/accelerate/utils/modeling.py:786 │
│ in check_device_map │
│ │
│ 783 │ │ │ ] │
│ 784 │ if len(all_model_tensors) > 0: │
│ 785 │ │ non_covered_params = ", ".join(all_model_tensors) │
│ ❱ 786 │ │ raise ValueError( │
│ 787 │ │ │ f"The device_map provided does not give any device for the following paramet │
│ 788 │ │ ) │
│ 789 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
ValueError: The device_map provided does not give any device for the following parameters: transformer.embedding.word_embeddings.weight, transformer.rotary_pos_emb.inv_freq,
transformer.encoder.layers.0.input_layernorm.weight, transformer.encoder.layers.0.self_attention.query_key_value.weight, transformer.encoder.layers.0.self_attention.query_key_value.weight_scale,
transformer.encoder.layers.0.self_attention.query_key_value.bias, transformer.encoder.layers.0.self_attention.dense.weight, transformer.encoder.layers.0.self_attention.dense.weight_scale,
transformer.encoder.layers.0.post_attention_layernorm.weight, transformer.encoder.layers.0.mlp.dense_h_to_4h.weight, transformer.encoder.layers.0.mlp.dense_h_to_4h.weight_scale,
transformer.encoder.layers.0.mlp.dense_4h_to_h.weight, transformer.encoder.layers.0.mlp.dense_4h_to_h.weight_scale, transformer.encoder.layers.1.input_layernorm.weight,
...
Steps To Reproduce
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) model = load_model_on_gpus(model_path, num_gpus=2)
Environment
Anything else?
n/a