open-compass / VLMEvalKit

Open-source evaluation toolkit of large vision-language models (LVLMs), support ~100 VLMs, 40+ benchmarks
https://huggingface.co/spaces/opencompass/open_vlm_leaderboard
Apache License 2.0
1.08k stars 154 forks source link

cogvlm2-llama3-chat-19B设置device_map="auto"报错 #411

Closed Michel-liu closed 1 month ago

Michel-liu commented 1 month ago

如题,推理时单卡放不下整个模型,但是为vlmeval/vlm/cogvlm.py中的model = AutoModelForCausalLM.from_pretrained添加device_map="auto"报错

model = AutoModelForCausalLM.from_pretrained(
            model_path,
            torch_dtype=torch.bfloat16,
            trust_remote_code=True,
            device_map="auto").eval()
image

提示不在同一个device RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:5 and cuda:6!

Michel-liu commented 1 month ago

I fix this by changing the code with the following setting:

def get_memory():
    total_memory = torch.cuda.get_device_properties(0).total_memory
    total_mem = total_memory / 1024 / 1024 / 1024
    return total_mem

def build_device_map(model, default_map=None, no_split=None, alpha=0.97, beta=0.9):
    total_num_gpus = torch.cuda.device_count()
    rank, world_size = get_rank_and_world_size()
    if world_size == total_num_gpus:
        return model.cuda()

    num_gpus = total_num_gpus // world_size
    memory_map = {}
    per_gpu_mem = get_memory() * alpha
    memory_map.update({rank: f'{beta * per_gpu_mem:.2f}GiB'})
    for gpu_id in range(1, num_gpus):
        memory_map.update({rank + gpu_id * world_size: f'{per_gpu_mem:.2f}GiB'})
    if hasattr(model, '_no_split_modules'):
        no_split_module = model._no_split_modules
    else:
        no_split_module = []
    if no_split is not None:
        no_split_module = list(set((no_split_module + no_split)))
    device_map = infer_auto_device_map(
        model,
        max_memory=memory_map,
        no_split_module_classes=no_split_module
    )
    if default_map is not None:
        for i in default_map:
            device_map[i] = rank
    for value in device_map.values():
        assert value != 'disk', 'Please check and make sure to have enough memory to load model.'

    model = dispatch_model(
        model,
        device_map=device_map).eval()
    return model, device_map

from accelerate import init_empty_weights

        with init_empty_weights():
            model = AutoModelForCausalLM.from_pretrained(
                model_path,
                torch_dtype=torch.bfloat16,
                low_cpu_mem_usage=True,
                trust_remote_code=True,
            )
        default_map = ['model.vision']
        model, _ = build_device_map(model, default_map, ["EVA2CLIPModel"])