Closed Michel-liu closed 1 month ago
I fix this by changing the code with the following setting:
def get_memory():
total_memory = torch.cuda.get_device_properties(0).total_memory
total_mem = total_memory / 1024 / 1024 / 1024
return total_mem
def build_device_map(model, default_map=None, no_split=None, alpha=0.97, beta=0.9):
total_num_gpus = torch.cuda.device_count()
rank, world_size = get_rank_and_world_size()
if world_size == total_num_gpus:
return model.cuda()
num_gpus = total_num_gpus // world_size
memory_map = {}
per_gpu_mem = get_memory() * alpha
memory_map.update({rank: f'{beta * per_gpu_mem:.2f}GiB'})
for gpu_id in range(1, num_gpus):
memory_map.update({rank + gpu_id * world_size: f'{per_gpu_mem:.2f}GiB'})
if hasattr(model, '_no_split_modules'):
no_split_module = model._no_split_modules
else:
no_split_module = []
if no_split is not None:
no_split_module = list(set((no_split_module + no_split)))
device_map = infer_auto_device_map(
model,
max_memory=memory_map,
no_split_module_classes=no_split_module
)
if default_map is not None:
for i in default_map:
device_map[i] = rank
for value in device_map.values():
assert value != 'disk', 'Please check and make sure to have enough memory to load model.'
model = dispatch_model(
model,
device_map=device_map).eval()
return model, device_map
from accelerate import init_empty_weights
with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True,
)
default_map = ['model.vision']
model, _ = build_device_map(model, default_map, ["EVA2CLIPModel"])
如题,推理时单卡放不下整个模型,但是为vlmeval/vlm/cogvlm.py中的model = AutoModelForCausalLM.from_pretrained添加device_map="auto"报错
提示不在同一个device RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:5 and cuda:6!