I'm receiving RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:2 and cpu! (when checking argument for argument weight in method wrapper_CUDA__native_layer_norm) when executing trainer.test(model, datamodule=dm, ckpt_path=checkpoint_path).
If I save the trained model manually instead (i.e. uncomment trainer.save_checkpoint(checkpoint_path)) and use the checkpoint_path for testing, this script will run smoothly.
The code provided in the next cell is saved in a file named mwe.py and executed as follows
import argparse
import json
import time
from datasets import Dataset
import torch
from torch.utils.data import DataLoader
import torch.distributed as dist
import transformers
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
)
from transformers import DataCollatorWithPadding
import pandas as pd
import peft
import warnings
from lightning import (
Trainer,
LightningDataModule,
LightningModule,
seed_everything,
)
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.utilities.deepspeed import (
convert_zero_checkpoint_to_fp32_state_dict,
)
from deepspeed.ops.adam import FusedAdam
warnings.filterwarnings("ignore")
torch.set_float32_matmul_precision("medium")
seed_everything(42)
def generate_seeded_prompts(for_training=True):
if for_training:
return ["training text" for _ in range(100)]
else:
return ["testing text" for _ in range(10)]
def get_transformer(pretrained_fm="tiiuae/falcon-7b-instruct"):
"""
Args:
pretrained_fm (str, optional): Location of pre-trained foundation model. Defaults to "tiiuae/falcon-7b-instruct".
Returns:
Transformer corresponding to the pretrained model
"""
model = AutoModelForCausalLM.from_pretrained(
pretrained_fm,
trust_remote_code=True,
low_cpu_mem_usage=True,
torch_dtype=torch.float16,
)
return model
def get_tokenizer(pretrained_fm="tiiuae/falcon-7b-instruct"):
"""
Args:
pretrained_fm (str, optional): Location of pre-trained foundation model. Defaults to "tiiuae/falcon-7b-instruct".
Returns:
Tokenizer corresponding to the pre-trained foundation model.
"""
tokenizer = AutoTokenizer.from_pretrained(
pretrained_fm,
padding=True,
trust_remote_code=True,
use_fast=True,
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.model_max_length = 512
return tokenizer
class CustomDataModule(LightningDataModule):
"""
Args:
LightningDataModule : Custom datamodule class
"""
def __init__(
self,
train_prompts,
test_prompts,
max_seq_length: int = 512,
train_batch_size: int = 4,
eval_batch_size: int = 4,
test_batch_size: int = 2,
train_val_split: float = 0.9,
**kwargs,
):
"""
Args:
train_prompts (list of strings): prompts used for fine-tuning
test_prompts (list of strings): prompts used for testing
max_seq_length (int, optional): Max length of tokens where each prompt gets mapped. Defaults to 512.
train_batch_size (int, optional): Minibatch size for training dataloader. Defaults to 4.
eval_batch_size (int, optional): Minibatch size for validation dataloader. Defaults to 4.
test_batch_size (int, optional): Minibatch size for testing dataloader. Defaults to 2.
train_val_split (float, optional): Train to validation ratio applied for splitting the train_prompts. Defaults to 0.9.
"""
super().__init__()
self.train_prompts = train_prompts
self.test_prompts = test_prompts
self.dataset = {}
self.max_seq_length = max_seq_length
self.train_batch_size = train_batch_size
self.eval_batch_size = eval_batch_size
self.test_batch_size = test_batch_size
self.train_val_split = train_val_split
self.tokenizer = get_tokenizer()
self.data_collator = DataCollatorWithPadding(self.tokenizer)
def setup(self, stage):
train_val_size = int(self.train_val_split * len(self.train_prompts))
train_data = self.train_prompts[:train_val_size]
validation_data = self.train_prompts[train_val_size:]
test_data = self.test_prompts
train_df = pd.DataFrame({"text": train_data})
validation_df = pd.DataFrame({"text": validation_data})
test_df = pd.DataFrame({"text": test_data})
self.dataset["train"] = Dataset.from_pandas(train_df)
self.dataset["validation"] = Dataset.from_pandas(validation_df)
self.dataset["test"] = Dataset.from_pandas(test_df)
self.dataset["train"] = self.dataset["train"].map(self.tokenize)
self.dataset["validation"] = self.dataset["validation"].map(self.tokenize)
self.dataset["test"] = self.dataset["test"].map(
lambda example_batch: self.tokenize(example_batch, padding=True)
)
self.dataset["train"].set_format(type="torch")
self.dataset["validation"].set_format(type="torch")
self.dataset["test"].set_format(type="torch")
self.dataset["train"] = self.dataset["train"].remove_columns(["text"])
self.dataset["validation"] = self.dataset["validation"].remove_columns(["text"])
self.dataset["test"] = self.dataset["test"].remove_columns(["text"])
print(f"Train size {len(self.dataset['train'])}")
print(f"Validation size {len(self.dataset['validation'])}")
print(f"Test size {len(self.dataset['test'])}")
def train_dataloader(self):
return DataLoader(
self.dataset["train"],
batch_size=self.train_batch_size,
shuffle=True,
num_workers=8,
collate_fn=self.data_collator,
)
def val_dataloader(self):
return DataLoader(
self.dataset["validation"],
batch_size=self.eval_batch_size,
shuffle=False,
num_workers=8,
collate_fn=self.data_collator,
)
def test_dataloader(self):
return DataLoader(
self.dataset["test"],
batch_size=self.test_batch_size,
shuffle=False,
num_workers=8,
collate_fn=self.data_collator,
)
def tokenize(self, example_batch, indices=None, padding=True):
features = self.tokenizer(
example_batch["text"],
padding=padding,
truncation=True,
max_length=512,
)
return {
"input_ids": features["input_ids"],
"attention_mask": features["attention_mask"],
}
def setup_peft_config(peft_method="LORA"):
"""
Args:
peft_method (str, optional): Defaults to "LORA".
Returns:
Sets up the configuration required for applying a PEFT method on the pre-trained model.
"""
if peft_method == "IA3":
peft_config = peft.IA3Config(
task_type="CAUSAL_LM",
inference_mode=False,
target_modules=["query_key_value"],
feedforward_modules=["dense_h_to_4h", "dense_4h_to_h"],
)
elif peft_method == "LORA":
peft_config = peft.LoraConfig(
task_type="CAUSAL_LM",
inference_mode=False,
r=8,
lora_alpha=8,
lora_dropout=0.1,
target_modules=[
"query_key_value",
"dense_h_to_4h",
"dense_4h_to_h",
],
)
return peft_config
class LitFM(LightningModule):
"""
Args:
LightningModule for fine-tuning and testing
"""
def __init__(
self,
use_deepspeed=True,
peft_method="LORA",
sanity_check=True,
test_dataloader=None,
dnm=None,
fm=None,
):
"""
Args:
use_deepspeed (bool, optional): Flag for use of deepspeed acceleration. Defaults to True.
peft_method (str, optional): PEFT method to be used for fine-tuning. Defaults to "LORA".
sanity_check (bool, optional): Flag for printing expected input for model forward. Defaults to False. # To be removed
test_dataloader (dataloader, optional): Test dataloader . Defaults to None. # To be removed
dnm (str, optional): Fine-tuning dataset name. Defaults to None.
"""
super().__init__()
self.tokenizer, self.model = get_tokenizer(pretrained_fm=fm), get_transformer(
pretrained_fm=fm
)
self.model.config.pad_token_id = self.tokenizer.eos_token_id
if peft_method:
peft_config = setup_peft_config(peft_method=peft_method)
print(f"applying {peft_method} PEFT")
self.model = peft.get_peft_model(self.model, peft_config)
print("preparations for PEFT done")
self.model.print_trainable_parameters()
self.validation_step_outputs = []
self.use_deepspeed = use_deepspeed
self.sanity_check = sanity_check
self.test_dataloader = test_dataloader
self.dnm, self.fm = dnm, fm
self.save_hyperparameters()
self.start_time = time.time()
def forward(self, **inputs):
return self.model(**inputs)
def training_step(self, batch):
kwargs_for_forward = {
"input_ids": batch["input_ids"],
"attention_mask": batch["attention_mask"],
"labels": batch["input_ids"],
}
if self.sanity_check:
print("inside sanity check")
# Convert token IDs back to text using the tokenizer's decode method
decoded_text = self.tokenizer.decode(
batch["input_ids"][0], skip_special_tokens=True
)
print(f"decoded training input : {decoded_text}")
model_output = self(**kwargs_for_forward)
loss = model_output.loss
tensorboard_logs = (
{"training_loss": loss, "step": self.current_epoch}
if self.dnm == "ML"
else {"training_loss": loss}
)
# if not (self.use_deepspeed or self.use_ddp) or dist.get_rank() == 0:
self.log_dict(tensorboard_logs)
return loss
def validation_step(self, batch, batch_idx):
kwargs_for_forward = {
"input_ids": batch["input_ids"],
"attention_mask": batch["attention_mask"],
"labels": batch["input_ids"],
}
model_output = self(**kwargs_for_forward)
loss = model_output.loss
tensorboard_logs = (
{"val_loss": loss, "step": self.current_epoch}
if self.dnm == "ML"
else {"val_loss": loss}
)
# if not (self.use_deepspeed or self.use_ddp) or dist.get_rank() == 0:
self.log_dict(tensorboard_logs)
return loss
def configure_optimizers(self):
return FusedAdam(self.parameters(), lr=1e-3, weight_decay=1e-2)
def test_step(self, test_batch, test_batch_idx):
print("inside test step")
# Generate text using the model
# with torch.inference_mode():
outputs = self.model.generate(
input_ids=test_batch["input_ids"],
attention_mask=test_batch["attention_mask"],
num_return_sequences=1,
max_new_tokens=512,
eos_token_id=self.tokenizer.eos_token_id,
pad_token_id=self.tokenizer.eos_token_id,
do_sample=True,
top_k=10,
)
# for output in outputs:
generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"generated text : {generated_text}")
if dist.get_rank() == 0:
update_recommendations_outfile(generated_text, dnm=self.dnm, fm=self.fm)
# start a fine-tuning task from scratch
def run(args):
very_beginning = time.time()
print("Generate prompts")
train_prompts = generate_seeded_prompts()
test_prompts = generate_seeded_prompts(for_training=False)
print("setting up dataloaders")
dm = CustomDataModule(train_prompts, test_prompts)
dm.setup("")
# Set up fine-tuning
print("setting up model")
logger = TensorBoardLogger(f"exp_out_{args.dataset}", name="log")
model = LitFM(
peft_method=args.peft_method,
test_dataloader=dm.test_dataloader(),
dnm=args.dataset,
fm=args.fm,
)
checkpoint_callback = ModelCheckpoint(
dirpath=f"./checkpoints_{args.dataset}",
save_top_k=1,
monitor="val_loss",
mode="min",
save_weights_only=True,
)
early_stopping_callback = EarlyStopping(monitor="val_loss", mode="min", patience=5)
print("setting up trainer")
trainer = Trainer(
accelerator="gpu",
precision="bf16",
max_epochs=20,
strategy="deepspeed_stage_3",
num_sanity_val_steps=-1,
check_val_every_n_epoch=1,
log_every_n_steps=1,
logger=logger,
accumulate_grad_batches=4,
# gradient_clip_val=1.0,
callbacks=[checkpoint_callback, early_stopping_callback],
)
print("starting fit")
if not args.checkpoint_path:
trainer.fit(model, datamodule=dm)
else:
print(f"loading checkpoint from : {args.checkpoint_path}")
trainer.fit(model, datamodule=dm, ckpt_path=args.checkpoint_path)
checkpoint_path = f"deepspeed_ckpt"
# trainer.save_checkpoint(checkpoint_path)
checkpoint_path = checkpoint_callback.best_model_path
trainer.test(model, datamodule=dm, ckpt_path=checkpoint_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process some integers.")
parser.add_argument("--training_prompts", type=str, default="w/o description")
parser.add_argument("--peft_method", type=str, default="LORA")
parser.add_argument("--dataset", type=str, default="MWE")
parser.add_argument("--checkpoint_path", type=str, default=None)
parser.add_argument("--fm", type=str, default="tiiuae/falcon-7b-instruct")
args = parser.parse_args()
transformers.logging.set_verbosity_error()
assert torch.cuda.is_available(), "GPU Required"
run(args)
Error messages and logs
inside test step
Traceback (most recent call last):
File "/home/ubuntu/dion/mwe.py", line 427, in <module>
run(args)
File "/home/ubuntu/dion/mwe.py", line 401, in run
trainer.test(model, datamodule=dm, ckpt_path=checkpoint_path)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 742, in test
return call._call_and_handle_interrupt(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 42, in _call_and_handle_interrupt
return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/strategies/launchers/subprocess_script.py", line 93, in launch
return function(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 785, in _test_impl
results = self._run(model, ckpt_path=ckpt_path)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 980, in _run
results = self._run_stage()
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1016, in _run_stage
return self._evaluation_loop.run()
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/utilities.py", line 181, in _decorator
return loop_run(self, *args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/evaluation_loop.py", line 115, in run
self._evaluation_step(batch, batch_idx, dataloader_idx)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/evaluation_loop.py", line 376, in _evaluation_step
output = call._call_strategy_hook(trainer, hook_name, *step_kwargs.values())
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 293, in _call_strategy_hook
output = fn(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/strategies/deepspeed.py", line 919, in test_step
return self.model(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1783, in forward
loss = self.module(*inputs, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/overrides/base.py", line 100, in forward
return self._forward_module.test_step(*inputs, **kwargs)
File "/home/ubuntu/dion/mwe.py", line 324, in test_step
outputs = self.model.generate(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/peft/peft_model.py", line 1110, in generate
outputs = self.base_model.generate(**kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/transformers/generation/utils.py", line 1821, in generate
return self.sample(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/transformers/generation/utils.py", line 3095, in sample
outputs = self(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 753, in forward
transformer_outputs = self.transformer(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 648, in forward
outputs = block(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 381, in forward
layernorm_output = self.input_layernorm(hidden_states)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 190, in forward
return F.layer_norm(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/functional.py", line 2808, in layer_norm
return torch.layer_norm(
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument weight in method wrapper_CUDA__native_layer_norm)
Traceback (most recent call last):
File "/home/ubuntu/dion/mwe.py", line 427, in <module>
run(args)
File "/home/ubuntu/dion/mwe.py", line 401, in run
trainer.test(model, datamodule=dm, ckpt_path=checkpoint_path)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 742, in test
return call._call_and_handle_interrupt(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 42, in _call_and_handle_interrupt
return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/strategies/launchers/subprocess_script.py", line 93, in launch
return function(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 785, in _test_impl
results = self._run(model, ckpt_path=ckpt_path)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 980, in _run
results = self._run_stage()
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1016, in _run_stage
return self._evaluation_loop.run()
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/utilities.py", line 181, in _decorator
return loop_run(self, *args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/evaluation_loop.py", line 115, in run
self._evaluation_step(batch, batch_idx, dataloader_idx)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/evaluation_loop.py", line 376, in _evaluation_step
output = call._call_strategy_hook(trainer, hook_name, *step_kwargs.values())
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 293, in _call_strategy_hook
output = fn(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/strategies/deepspeed.py", line 919, in test_step
return self.model(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1783, in forward
loss = self.module(*inputs, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/overrides/base.py", line 100, in forward
return self._forward_module.test_step(*inputs, **kwargs)
File "/home/ubuntu/dion/mwe.py", line 324, in test_step
outputs = self.model.generate(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/peft/peft_model.py", line 1110, in generate
outputs = self.base_model.generate(**kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/transformers/generation/utils.py", line 1821, in generate
return self.sample(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/transformers/generation/utils.py", line 3095, in sample
outputs = self(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 753, in forward
transformer_outputs = self.transformer(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 648, in forward
outputs = block(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 381, in forward
layernorm_output = self.input_layernorm(hidden_states)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 190, in forward
return F.layer_norm(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/functional.py", line 2808, in layer_norm
return torch.layer_norm(
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cpu! (when checking argument for argument weight in method wrapper_CUDA__native_layer_norm)
Traceback (most recent call last):
File "/home/ubuntu/dion/mwe.py", line 427, in <module>
run(args)
File "/home/ubuntu/dion/mwe.py", line 401, in run
trainer.test(model, datamodule=dm, ckpt_path=checkpoint_path)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 742, in test
return call._call_and_handle_interrupt(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 42, in _call_and_handle_interrupt
return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/strategies/launchers/subprocess_script.py", line 93, in launch
return function(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 785, in _test_impl
results = self._run(model, ckpt_path=ckpt_path)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 980, in _run
results = self._run_stage()
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1016, in _run_stage
return self._evaluation_loop.run()
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/utilities.py", line 181, in _decorator
return loop_run(self, *args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/evaluation_loop.py", line 115, in run
self._evaluation_step(batch, batch_idx, dataloader_idx)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/evaluation_loop.py", line 376, in _evaluation_step
output = call._call_strategy_hook(trainer, hook_name, *step_kwargs.values())
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 293, in _call_strategy_hook
output = fn(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/strategies/deepspeed.py", line 919, in test_step
return self.model(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1783, in forward
loss = self.module(*inputs, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/overrides/base.py", line 100, in forward
return self._forward_module.test_step(*inputs, **kwargs)
File "/home/ubuntu/dion/mwe.py", line 324, in test_step
outputs = self.model.generate(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/peft/peft_model.py", line 1110, in generate
outputs = self.base_model.generate(**kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/transformers/generation/utils.py", line 1821, in generate
return self.sample(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/transformers/generation/utils.py", line 3095, in sample
outputs = self(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 753, in forward
transformer_outputs = self.transformer(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 648, in forward
outputs = block(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 381, in forward
layernorm_output = self.input_layernorm(hidden_states)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 190, in forward
return F.layer_norm(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/functional.py", line 2808, in layer_norm
return torch.layer_norm(
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:3 and cpu! (when checking argument for argument weight in method wrapper_CUDA__native_layer_norm)
Traceback (most recent call last):
File "/home/ubuntu/dion/mwe.py", line 427, in <module>
run(args)
File "/home/ubuntu/dion/mwe.py", line 401, in run
trainer.test(model, datamodule=dm, ckpt_path=checkpoint_path)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 742, in test
return call._call_and_handle_interrupt(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 42, in _call_and_handle_interrupt
return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/strategies/launchers/subprocess_script.py", line 93, in launch
return function(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 785, in _test_impl
results = self._run(model, ckpt_path=ckpt_path)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 980, in _run
results = self._run_stage()
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1016, in _run_stage
return self._evaluation_loop.run()
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/utilities.py", line 181, in _decorator
return loop_run(self, *args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/evaluation_loop.py", line 115, in run
self._evaluation_step(batch, batch_idx, dataloader_idx)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/loops/evaluation_loop.py", line 376, in _evaluation_step
output = call._call_strategy_hook(trainer, hook_name, *step_kwargs.values())
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 293, in _call_strategy_hook
output = fn(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/strategies/deepspeed.py", line 919, in test_step
return self.model(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1783, in forward
loss = self.module(*inputs, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/lightning/pytorch/overrides/base.py", line 100, in forward
return self._forward_module.test_step(*inputs, **kwargs)
File "/home/ubuntu/dion/mwe.py", line 324, in test_step
outputs = self.model.generate(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/peft/peft_model.py", line 1110, in generate
outputs = self.base_model.generate(**kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/transformers/generation/utils.py", line 1821, in generate
return self.sample(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/transformers/generation/utils.py", line 3095, in sample
outputs = self(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 753, in forward
transformer_outputs = self.transformer(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 648, in forward
outputs = block(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b-instruct/eb410fb6ffa9028e97adb801f0d6ec46d02f8b07/modelling_RW.py", line 381, in forward
layernorm_output = self.input_layernorm(hidden_states)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1538, in _call_impl
result = forward_call(*args, **kwargs)
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 190, in forward
return F.layer_norm(
File "/home/ubuntu/dion/poc/lib/python3.10/site-packages/torch/nn/functional.py", line 2808, in layer_norm
return torch.layer_norm(
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:2 and cpu! (when checking argument for argument weight in method wrapper_CUDA__native_layer_norm)```
Bug description
I'm receiving
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:2 and cpu! (when checking argument for argument weight in method wrapper_CUDA__native_layer_norm)
when executingtrainer.test(model, datamodule=dm, ckpt_path=checkpoint_path)
.If I save the trained model manually instead (i.e. uncomment
trainer.save_checkpoint(checkpoint_path)
) and use thecheckpoint_path
for testing, this script will run smoothly.The code provided in the next cell is saved in a file named
mwe.py
and executed as followsTOKENIZERS_PARALLELISM=true CUDA_VISIBLE_DEVICES="0,1,2,3" python mwe.py
What version are you seeing the problem on?
v2.0
How to reproduce the bug
Error messages and logs
Environment
cc @awaelchli