Open cafezitopalito opened 1 year ago
Hello. I'm having a issue on this part of your code [train_lora].
trainer = transformers.Trainer(
model=model, train_dataset=data["train"], args=transformers.TrainingArguments( per_device_train_batch_size=MICRO_BATCH_SIZE, gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, warmup_steps=100, num_train_epochs=EPOCHS, learning_rate=LEARNING_RATE, fp16=True, logging_steps=20, output_dir="lora-alpaca", save_total_limit=3, ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train(resume_from_checkpoint=False)
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ in :1 │ │ │ │ ❱ 1 trainer = transformers.Trainer( │ │ 2 │ model=model, │ │ 3 │ train_dataset=data["train"], │ │ 4 │ args=transformers.TrainingArguments( │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/transformers/trainer.py:476 in init │ │ │ │ 473 │ │ self.tokenizer = tokenizer │ │ 474 │ │ │ │ 475 │ │ if self.place_model_on_device and not getattr(model, "is_loaded_in_8bit", False) │ │ ❱ 476 │ │ │ self._move_model_to_device(model, args.device) │ │ 477 │ │ │ │ 478 │ │ # Force n_gpu to 1 to avoid DataParallel as MP will manage the GPUs │ │ 479 │ │ if self.is_model_parallel: │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/transformers/trainer.py:715 in │ │ _move_model_to_device │ │ │ │ 712 │ │ self.callback_handler.remove_callback(callback) │ │ 713 │ │ │ 714 │ def _move_model_to_device(self, model, device): │ │ ❱ 715 │ │ model = model.to(device) │ │ 716 │ │ # Moving a model to an XLA device disconnects the tied weights, so we have to re │ │ 717 │ │ if self.args.parallel_mode == ParallelMode.TPU and hasattr(model, "tie_weights") │ │ 718 │ │ │ model.tie_weights() │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/transformers/modeling_utils.py:1811 in to │ │ │ │ 1808 │ │ │ │ " model has already been set to the correct devices and casted to the co │ │ 1809 │ │ │ ) │ │ 1810 │ │ else: │ │ ❱ 1811 │ │ │ return super().to(*args, *kwargs) │ │ 1812 │ │ │ 1813 │ def half(self, args): │ │ 1814 │ │ # Checks if the model has been loaded in 8-bit │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1145 in to │ │ │ │ 1142 │ │ │ │ │ │ │ non_blocking, memory_format=convert_to_format) │ │ 1143 │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() else No │ │ 1144 │ │ │ │ ❱ 1145 │ │ return self._apply(convert) │ │ 1146 │ │ │ 1147 │ def register_full_backward_pre_hook( │ │ 1148 │ │ self, │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:820 in _apply │ │ │ │ 817 │ │ │ # track autograd history of param_applied, so we have to use │ │ 818 │ │ │ # with torch.no_grad(): │ │ 819 │ │ │ with torch.no_grad(): │ │ ❱ 820 │ │ │ │ param_applied = fn(param) │ │ 821 │ │ │ should_use_set_data = compute_should_use_set_data(param, param_applied) │ │ 822 │ │ │ if should_use_set_data: │ │ 823 │ │ │ │ param.data = param_applied │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1143 in │ │ convert │ │ │ │ 1140 │ │ │ if convert_to_format is not None and t.dim() in (4, 5): │ │ 1141 │ │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() els │ │ 1142 │ │ │ │ │ │ │ non_blocking, memory_format=convert_to_format) │ │ ❱ 1143 │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() else No │ │ 1144 │ │ │ │ 1145 │ │ return self._apply(convert) │ │ 1146 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ NotImplementedError: Cannot copy out of meta tensor; no data!
param_applied
with torch.no_grad():
Hello. I'm having a issue on this part of your code [train_lora].
trainer = transformers.Trainer(
)
model.config.use_cache = False
trainer.train(resume_from_checkpoint=False)
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ in:1 │
│ │
│ ❱ 1 trainer = transformers.Trainer( │
│ 2 │ model=model, │
│ 3 │ train_dataset=data["train"], │
│ 4 │ args=transformers.TrainingArguments( │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/transformers/trainer.py:476 in init │
│ │
│ 473 │ │ self.tokenizer = tokenizer │
│ 474 │ │ │
│ 475 │ │ if self.place_model_on_device and not getattr(model, "is_loaded_in_8bit", False) │
│ ❱ 476 │ │ │ self._move_model_to_device(model, args.device) │
│ 477 │ │ │
│ 478 │ │ # Force n_gpu to 1 to avoid DataParallel as MP will manage the GPUs │
│ 479 │ │ if self.is_model_parallel: │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/transformers/trainer.py:715 in │
│ _move_model_to_device │
│ │
│ 712 │ │ self.callback_handler.remove_callback(callback) │
│ 713 │ │
│ 714 │ def _move_model_to_device(self, model, device): │
│ ❱ 715 │ │ model = model.to(device) │
│ 716 │ │ # Moving a model to an XLA device disconnects the tied weights, so we have to re │
│ 717 │ │ if self.args.parallel_mode == ParallelMode.TPU and hasattr(model, "tie_weights") │
│ 718 │ │ │ model.tie_weights() │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/transformers/modeling_utils.py:1811 in to │
│ │
│ 1808 │ │ │ │ " model has already been set to the correct devices and casted to the co │
│ 1809 │ │ │ ) │
│ 1810 │ │ else: │
│ ❱ 1811 │ │ │ return super().to(*args, *kwargs) │
│ 1812 │ │
│ 1813 │ def half(self, args): │
│ 1814 │ │ # Checks if the model has been loaded in 8-bit │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1145 in to │
│ │
│ 1142 │ │ │ │ │ │ │ non_blocking, memory_format=convert_to_format) │
│ 1143 │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() else No │
│ 1144 │ │ │
│ ❱ 1145 │ │ return self._apply(convert) │
│ 1146 │ │
│ 1147 │ def register_full_backward_pre_hook( │
│ 1148 │ │ self, │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │
│ │
│ 794 │ │
│ 795 │ def _apply(self, fn): │
│ 796 │ │ for module in self.children(): │
│ ❱ 797 │ │ │ module._apply(fn) │
│ 798 │ │ │
│ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │
│ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │
│ │
│ 794 │ │
│ 795 │ def _apply(self, fn): │
│ 796 │ │ for module in self.children(): │
│ ❱ 797 │ │ │ module._apply(fn) │
│ 798 │ │ │
│ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │
│ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │
│ │
│ 794 │ │
│ 795 │ def _apply(self, fn): │
│ 796 │ │ for module in self.children(): │
│ ❱ 797 │ │ │ module._apply(fn) │
│ 798 │ │ │
│ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │
│ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │
│ │
│ 794 │ │
│ 795 │ def _apply(self, fn): │
│ 796 │ │ for module in self.children(): │
│ ❱ 797 │ │ │ module._apply(fn) │
│ 798 │ │ │
│ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │
│ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │
│ │
│ 794 │ │
│ 795 │ def _apply(self, fn): │
│ 796 │ │ for module in self.children(): │
│ ❱ 797 │ │ │ module._apply(fn) │
│ 798 │ │ │
│ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │
│ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:820 in _apply │
│ │
│ 817 │ │ │ # track autograd history of
param_applied
, so we have to use │ │ 818 │ │ │ #with torch.no_grad():
│ │ 819 │ │ │ with torch.no_grad(): │ │ ❱ 820 │ │ │ │ param_applied = fn(param) │ │ 821 │ │ │ should_use_set_data = compute_should_use_set_data(param, param_applied) │ │ 822 │ │ │ if should_use_set_data: │ │ 823 │ │ │ │ param.data = param_applied │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1143 in │ │ convert │ │ │ │ 1140 │ │ │ if convert_to_format is not None and t.dim() in (4, 5): │ │ 1141 │ │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() els │ │ 1142 │ │ │ │ │ │ │ non_blocking, memory_format=convert_to_format) │ │ ❱ 1143 │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() else No │ │ 1144 │ │ │ │ 1145 │ │ return self._apply(convert) │ │ 1146 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ NotImplementedError: Cannot copy out of meta tensor; no data!