OpenMOSS / MOSS

An open-source tool-augmented conversational language model from Fudan University
https://txsun1997.github.io/blogs/moss.html
Apache License 2.0
11.9k stars 1.14k forks source link

微调如何指定GPU #312

Open 631068264 opened 1 year ago

631068264 commented 1 year ago

在finetune_moss.py最顶加入

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

我有4张卡,只有1卡是空闲的

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /xxx/xxx/dl/apps/moss_new/finetune_moss.py:308 in <module>                              │
│                                                                                                  │
│   305 │   os.makedirs(args.output_dir, exist_ok=True)                                            │
│   306 │                                                                                          │
│   307 │   set_seed(args.seed)                                                                    │
│ ❱ 308 │   train(args)                                                                            │
│   309                                                                                            │
│                                                                                                  │
│ /xxx/xxx/dl/apps/moss_new/finetune_moss.py:213 in train                                 │
│                                                                                                  │
│   210 │   num_training_steps = (len(train_dataloader) * args.n_epochs) // accelerator.gradient   │
│   211 │   lr_scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=int(args.   │
│   212 │                                                                                          │
│ ❱ 213 │   model, optimizer, train_dataloader, val_dataloader, lr_scheduler = accelerator.prepa   │
│   214 │                                                                                          │
│   215 │   global_step = 0                                                                        │
│   216 │   metric = SFTMetric(device=torch.cuda.current_device())                                 │
│                                                                                                  │
│ /xxx/xxx/anaconda3/envs/moss_new/lib/python3.8/site-packages/accelerate/accelerator.py: │
│ 1139 in prepare                                                                                  │
│                                                                                                  │
│   1136 │   │   │   if self.device.type == "cpu" and self.state.ipex_plugin is not None:          │
│   1137 │   │   │   │   args = self._prepare_ipex(*args)                                          │
│   1138 │   │   if self.distributed_type == DistributedType.DEEPSPEED:                            │
│ ❱ 1139 │   │   │   result = self._prepare_deepspeed(*args)                                       │
│   1140 │   │   elif self.distributed_type == DistributedType.MEGATRON_LM:                        │
│   1141 │   │   │   result = self._prepare_megatron_lm(*args)                                     │
│   1142 │   │   else:                                                                             │
│                                                                                                  │
│ /xxx/xxx/anaconda3/envs/moss_new/lib/python3.8/site-packages/accelerate/accelerator.py: │
│ 1446 in _prepare_deepspeed                                                                       │
│                                                                                                  │
│   1443 │   │   │   │   │   │   if type(scheduler).__name__ in deepspeed.runtime.lr_schedules.VA  │
│   1444 │   │   │   │   │   │   │   kwargs["lr_scheduler"] = scheduler                            │
│   1445 │   │   │                                                                                 │
│ ❱ 1446 │   │   │   engine, optimizer, _, lr_scheduler = deepspeed.initialize(**kwargs)           │
│   1447 │   │   │   if optimizer is not None:                                                     │
│   1448 │   │   │   │   optimizer = DeepSpeedOptimizerWrapper(optimizer)                          │
│   1449 │   │   │   if scheduler is not None:                                                     │
│                                                                                                  │
│ /xxx/xxx/anaconda3/envs/moss_new/lib/python3.8/site-packages/deepspeed/__init__.py:165  │
│ in initialize                                                                                    │
│                                                                                                  │
│   162 │   │   │   │   │   │   │   │   │   │      config=config,                                  │
│   163 │   │   │   │   │   │   │   │   │   │      config_class=config_class)                      │
│   164 │   │   else:                                                                              │
│ ❱ 165 │   │   │   engine = DeepSpeedEngine(args=args,                                            │
│   166 │   │   │   │   │   │   │   │   │    model=model,                                          │
│   167 │   │   │   │   │   │   │   │   │    optimizer=optimizer,                                  │
│   168 │   │   │   │   │   │   │   │   │    model_parameters=model_parameters,                    │
│                                                                                                  │
│ /xxx/xxx/anaconda3/envs/moss_new/lib/python3.8/site-packages/deepspeed/runtime/engine.p │
│ y:308 in __init__                                                                                │
│                                                                                                  │
│    305 │   │   │   model_parameters = list(model_parameters)                                     │
│    306 │   │                                                                                     │
│    307 │   │   if has_optimizer:                                                                 │
│ ❱  308 │   │   │   self._configure_optimizer(optimizer, model_parameters)                        │
│    309 │   │   │   self._configure_lr_scheduler(lr_scheduler)                                    │
│    310 │   │   │   self._report_progress(0)                                                      │
│    311 │   │   elif self.zero_optimization():                                                    │
│                                                                                                  │
│ /xxx/xxx/anaconda3/envs/moss_new/lib/python3.8/site-packages/deepspeed/runtime/engine.p │
│ y:1173 in _configure_optimizer                                                                   │
│                                                                                                  │
│   1170 │   │   optimizer_wrapper = self._do_optimizer_sanity_check(basic_optimizer)              │
│   1171 │   │                                                                                     │
│   1172 │   │   if optimizer_wrapper == ZERO_OPTIMIZATION:                                        │
│ ❱ 1173 │   │   │   self.optimizer = self._configure_zero_optimizer(basic_optimizer)              │
│   1174 │   │   elif optimizer_wrapper == AMP:                                                    │
│   1175 │   │   │   amp_params = self.amp_params()                                                │
│   1176 │   │   │   log_dist(f"Initializing AMP with these params: {amp_params}", ranks=[0])      │
│                                                                                                  │
│ /xxx/xxx/anaconda3/envs/moss_new/lib/python3.8/site-packages/deepspeed/runtime/engine.p │
│ y:1463 in _configure_zero_optimizer                                                              │
│                                                                                                  │
│   1460 │   │   │   │                                                                             │
│   1461 │   │   │   │   log_dist(f'Creating {model_dtype} ZeRO stage {zero_stage} optimizer', ra  │
│   1462 │   │   │   │   from deepspeed.runtime.zero.stage3 import DeepSpeedZeroOptimizer_Stage3   │
│ ❱ 1463 │   │   │   │   optimizer = DeepSpeedZeroOptimizer_Stage3(                                │
│   1464 │   │   │   │   │   self.module,                                                          │
│   1465 │   │   │   │   │   optimizer,                                                            │
│   1466 │   │   │   │   │   timers=timers,                                                        │
│                                                                                                  │
│ /xxx/xxx/anaconda3/envs/moss_new/lib/python3.8/site-packages/deepspeed/runtime/zero/sta │
│ ge3.py:304 in __init__                                                                           │
│                                                                                                  │
│    301 │   │   ])                                                                                │
│    302 │   │   print_rank_0(f'Largest partitioned param numel = {largest_partitioned_param_nume  │
│    303 │   │                                                                                     │
│ ❱  304 │   │   self._setup_for_real_optimizer()                                                  │
│    305 │   │   self.grad_position = {}                                                           │
│    306 │   │   self.set_grad_positions()                                                         │
│    307                                                                                           │
│                                                                                                  │
│ /xxx/xxx/anaconda3/envs/moss_new/lib/python3.8/site-packages/deepspeed/runtime/zero/sta │
│ ge3.py:378 in _setup_for_real_optimizer                                                          │
│                                                                                                  │
│    375 │                                                                                         │
│    376 │   def _setup_for_real_optimizer(self):                                                  │
│    377 │   │   see_memory_usage("Before creating fp32 partitions", force=True)                   │
│ ❱  378 │   │   self._create_fp32_partitions()                                                    │
│    379 │   │   see_memory_usage("After creating fp32 partitions", force=True)                    │
│    380 │   │   dist.barrier()                                                                    │
│    381                                                                                           │
│                                                                                                  │
│ /xxx/xxx/anaconda3/envs/moss_new/lib/python3.8/site-packages/deepspeed/runtime/zero/sta │
│ ge3.py:751 in _create_fp32_partitions                                                            │
│                                                                                                  │
│    748 │   │   │   │   │   self._swap_in_sub_group_to_flat_buffer(unpinned_fp32_buffer, i)       │
│    749 │   │   │   │   │   self.fp32_partitioned_groups_flat.append(unpinned_fp32_buffer)        │
│    750 │   │   │   │   else:                                                                     │
│ ❱  751 │   │   │   │   │   self.fp32_partitioned_groups_flat.append(self.fp16_partitioned_group  │
│    752 │   │   │   │   │   │   self.device).clone().float().detach())                            │
│    753 │   │   │                                                                                 │
│    754 │   │   │   self.fp32_partitioned_groups_flat[i].requires_grad = True  # keep this in ca  │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
OutOfMemoryError: CUDA out of memory. Tried to allocate 3.80 GiB (GPU 0; 79.10 GiB total capacity; 72.75 GiB already allocated; 2.56 GiB 
free; 75.54 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.
See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
[17:06:01] ERROR    failed (exitcode: 1) local_rank: 0 (pid: 23095) of binary:                

参考 https://github.com/OpenLMLab/MOSS/issues/286 也不行