cogvlm2-llama3-chat-19B设置device_map="auto"报错

I fix this by changing the code with the following setting:
def get_memory():
    total_memory = torch.cuda.get_device_properties(0).total_memory
    total_mem = total_memory / 1024 / 1024 / 1024
    return total_mem

def build_device_map(model, default_map=None, no_split=None, alpha=0.97, beta=0.9):
    total_num_gpus = torch.cuda.device_count()
    rank, world_size = get_rank_and_world_size()
    if world_size == total_num_gpus:
        return model.cuda()

    num_gpus = total_num_gpus // world_size
    memory_map = {}
    per_gpu_mem = get_memory() * alpha
    memory_map.update({rank: f'{beta * per_gpu_mem:.2f}GiB'})
    for gpu_id in range(1, num_gpus):
        memory_map.update({rank + gpu_id * world_size: f'{per_gpu_mem:.2f}GiB'})
    if hasattr(model, '_no_split_modules'):
        no_split_module = model._no_split_modules
    else:
        no_split_module = []
    if no_split is not None:
        no_split_module = list(set((no_split_module + no_split)))
    device_map = infer_auto_device_map(
        model,
        max_memory=memory_map,
        no_split_module_classes=no_split_module
    )
    if default_map is not None:
        for i in default_map:
            device_map[i] = rank
    for value in device_map.values():
        assert value != 'disk', 'Please check and make sure to have enough memory to load model.'

    model = dispatch_model(
        model,
        device_map=device_map).eval()
    return model, device_map

from accelerate import init_empty_weights

        with init_empty_weights():
            model = AutoModelForCausalLM.from_pretrained(
                model_path,
                torch_dtype=torch.bfloat16,
                low_cpu_mem_usage=True,
                trust_remote_code=True,
            )
        default_map = ['model.vision']
        model, _ = build_device_map(model, default_map, ["EVA2CLIPModel"])
open-compass / VLMEvalKit

cogvlm2-llama3-chat-19B设置device_map="auto"报错 #411