Cannot copy out of meta tensor; no data!

Hello. I'm having a issue on this part of your code [train_lora].

trainer = transformers.Trainer(

model=model,

train_dataset=data["train"],

args=transformers.TrainingArguments(

    per_device_train_batch_size=MICRO_BATCH_SIZE,

    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,

    warmup_steps=100,

    num_train_epochs=EPOCHS,

    learning_rate=LEARNING_RATE,

    fp16=True,

    logging_steps=20,

    output_dir="lora-alpaca",

    save_total_limit=3,

),

data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),

)

model.config.use_cache = False

trainer.train(resume_from_checkpoint=False)

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ in :1 │ │ │ │ ❱ 1 trainer = transformers.Trainer( │ │ 2 │ model=model, │ │ 3 │ train_dataset=data["train"], │ │ 4 │ args=transformers.TrainingArguments( │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/transformers/trainer.py:476 in init │ │ │ │ 473 │ │ self.tokenizer = tokenizer │ │ 474 │ │ │ │ 475 │ │ if self.place_model_on_device and not getattr(model, "is_loaded_in_8bit", False) │ │ ❱ 476 │ │ │ self._move_model_to_device(model, args.device) │ │ 477 │ │ │ │ 478 │ │ # Force n_gpu to 1 to avoid DataParallel as MP will manage the GPUs │ │ 479 │ │ if self.is_model_parallel: │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/transformers/trainer.py:715 in │ │ _move_model_to_device │ │ │ │ 712 │ │ self.callback_handler.remove_callback(callback) │ │ 713 │ │ │ 714 │ def _move_model_to_device(self, model, device): │ │ ❱ 715 │ │ model = model.to(device) │ │ 716 │ │ # Moving a model to an XLA device disconnects the tied weights, so we have to re │ │ 717 │ │ if self.args.parallel_mode == ParallelMode.TPU and hasattr(model, "tie_weights") │ │ 718 │ │ │ model.tie_weights() │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/transformers/modeling_utils.py:1811 in to │ │ │ │ 1808 │ │ │ │ " model has already been set to the correct devices and casted to the co │ │ 1809 │ │ │ ) │ │ 1810 │ │ else: │ │ ❱ 1811 │ │ │ return super().to(*args, *kwargs) │ │ 1812 │ │ │ 1813 │ def half(self, args): │ │ 1814 │ │ # Checks if the model has been loaded in 8-bit │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1145 in to │ │ │ │ 1142 │ │ │ │ │ │ │ non_blocking, memory_format=convert_to_format) │ │ 1143 │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() else No │ │ 1144 │ │ │ │ ❱ 1145 │ │ return self._apply(convert) │ │ 1146 │ │ │ 1147 │ def register_full_backward_pre_hook( │ │ 1148 │ │ self, │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:820 in _apply │ │ │ │ 817 │ │ │ # track autograd history of param_applied, so we have to use │ │ 818 │ │ │ # with torch.no_grad(): │ │ 819 │ │ │ with torch.no_grad(): │ │ ❱ 820 │ │ │ │ param_applied = fn(param) │ │ 821 │ │ │ should_use_set_data = compute_should_use_set_data(param, param_applied) │ │ 822 │ │ │ if should_use_set_data: │ │ 823 │ │ │ │ param.data = param_applied │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1143 in │ │ convert │ │ │ │ 1140 │ │ │ if convert_to_format is not None and t.dim() in (4, 5): │ │ 1141 │ │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() els │ │ 1142 │ │ │ │ │ │ │ non_blocking, memory_format=convert_to_format) │ │ ❱ 1143 │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() else No │ │ 1144 │ │ │ │ 1145 │ │ return self._apply(convert) │ │ 1146 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ NotImplementedError: Cannot copy out of meta tensor; no data!

22-hours / cabrita

Cannot copy out of meta tensor; no data! #9