Closed xiehuanyi closed 8 months ago
Could you please reveal your model architecture? Have you ever checked whether this model is compatible with Opacus (https://opacus.ai/tutorials/guide_to_module_validator)?
Close the issue due to not responding. Feel free to re-open if there is a need.
Hello @HuanyuZhang. I am facing a very similar issue. I hope you can help me out. I am using the dp-transformers library to implement differential privacy in a text classification task using bert-tiny. The issue seems to be very similar to the one described by @xiehuanyi.
Here is the code:
import datasets
import dp_transformers
import transformers
import sys
from dataclasses import dataclass, field, asdict
from peft import get_peft_model, LoraConfig
@dataclass
class ModelArguments:
model_name: str = field(default="gpt2", metadata={
"help": "Model name in HuggingFace, e.g. 'gpt2'"
})
sequence_len: int = field(default=128, metadata={
"help": "Maximum sequence length"
})
@dataclass
class LoraArguments:
enable_lora: bool = field(default=False, metadata={
"help": "Whether to enable LoRA"
})
lora_dim: int = field(default=8, metadata={
"help": "LoRA dimension"
})
lora_alpha: int = field(default=8, metadata={
"help": "LoRA alpha"
})
lora_dropout: float = field(default=0.0, metadata={
"help": "LoRA dropout"
})
def as_peft_config(self) -> LoraConfig:
if not self.enable_lora:
raise ValueError("LoRA is not enabled, cannot convert to LoRA config")
params = asdict(self)
params.pop("enable_lora")
params["r"] = params.pop("lora_dim")
return LoraConfig(**params)
@dataclass
class Arguments:
train: dp_transformers.TrainingArguments
privacy: dp_transformers.PrivacyArguments
model: ModelArguments
lora: LoraConfig
def main(args: Arguments):
transformers.set_seed(args.train.seed)
# Load model
model = transformers.BertForSequenceClassification.from_pretrained(args.model.model_name)
# Load data
dataset = datasets.load_dataset('glue', 'sst2', split="train").train_test_split(0.02, seed=args.train.seed)
# Load tokenizer
tokenizer = transformers.BertTokenizer.from_pretrained(args.model.model_name)
dataset = dataset.map(
lambda examples: tokenizer(examples['sentence'], padding="max_length", truncation=True, max_length=args.model.sequence_len),
desc="tokenizing dataset", remove_columns=['sentence', 'idx']
)
dataset.rename_column("label", "labels")
model = model.cuda()
model.train()
data_collator = transformers.DataCollatorWithPadding(tokenizer, padding="longest")
trainer = dp_transformers.dp_utils.OpacusDPTrainer(
args=train_args,
model=model,
train_dataset=dataset['train'],
eval_dataset=dataset['test'],
data_collator=data_collator,
privacy_args=privacy_args,
)
try:
trainer.train()
finally:
eps_prv = trainer.get_prv_epsilon()
eps_rdp = trainer.get_rdp_epsilon()
trainer.log({
"final_epsilon_prv": eps_prv,
"final_epsilon_rdp": eps_rdp
})
train_args=dp_transformers.TrainingArguments(
output_dir="scratch",
per_device_train_batch_size=64,
gradient_accumulation_steps=1,
evaluation_strategy="steps",
eval_steps=45,
per_device_eval_batch_size=64,
eval_accumulation_steps=1,
seed=42,
weight_decay=0.01,
remove_unused_columns=False,
num_train_epochs=1,
logging_steps=5,
max_grad_norm=0,
lr_scheduler_type="constant",
learning_rate=3e-4,
label_names="labels"
)
privacy_args=dp_transformers.PrivacyArguments(
per_sample_max_grad_norm=1.0,
target_epsilon=8
)
model_args=ModelArguments(
model_name="prajjwal1/bert-tiny",
sequence_len=512
)
lora_args=LoraArguments(
enable_lora=True,
lora_dim=4,
lora_alpha=32,
lora_dropout=0.0
)
main(Arguments(train=train_args, privacy=privacy_args, model=model_args, lora=lora_args))
Here is the stack trace showing the error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
[<ipython-input-24-7cff3dd13869>](https://localhost:8080/#) in <cell line: 33>()
31 lora_dropout=0.0
32 )
---> 33 main(Arguments(train=train_args, privacy=privacy_args, model=model_args, lora=lora_args))
7 frames
[<ipython-input-23-9c34a7138e6f>](https://localhost:8080/#) in main(args)
87
88 try:
---> 89 trainer.train()
90 finally:
91 eps_prv = trainer.get_prv_epsilon()
[/usr/local/lib/python3.10/dist-packages/transformers/trainer.py](https://localhost:8080/#) in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1643 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1644 )
-> 1645 return inner_training_loop(
1646 args=args,
1647 resume_from_checkpoint=resume_from_checkpoint,
[/usr/local/lib/python3.10/dist-packages/transformers/trainer.py](https://localhost:8080/#) in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1996 optimizer_was_run = scale_before <= scale_after
1997 else:
-> 1998 self.optimizer.step()
1999 optimizer_was_run = not self.accelerator.optimizer_step_was_skipped
2000
[/usr/local/lib/python3.10/dist-packages/accelerate/optimizer.py](https://localhost:8080/#) in step(self, closure)
143 self._accelerate_step_called = False
144 else:
--> 145 self.optimizer.step(closure)
146
147 def _switch_parameters(self, parameters_map):
[/usr/local/lib/python3.10/dist-packages/torch/optim/lr_scheduler.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
66 instance._step_count += 1
67 wrapped = func.__get__(instance, cls)
---> 68 return wrapped(*args, **kwargs)
69
70 # Note that the returned function here is no longer a bound method,
[/usr/local/lib/python3.10/dist-packages/opacus/optimizers/optimizer.py](https://localhost:8080/#) in step(self, closure)
511 closure()
512
--> 513 if self.pre_step():
514 return self.original_optimizer.step()
515 else:
[/usr/local/lib/python3.10/dist-packages/opacus/optimizers/optimizer.py](https://localhost:8080/#) in pre_step(self, closure)
492 returns the loss. Optional for most optimizers.
493 """
--> 494 self.clip_and_accumulate()
495 if self._check_skip_next_step():
496 self._is_last_step_skipped = True
[/usr/local/lib/python3.10/dist-packages/opacus/optimizers/optimizer.py](https://localhost:8080/#) in clip_and_accumulate(self)
402 g.reshape(len(g), -1).norm(2, dim=-1) for g in self.grad_samples
403 ]
--> 404 per_sample_norms = torch.stack(per_param_norms, dim=1).norm(2, dim=1)
405 per_sample_clip_factor = (
406 self.max_grad_norm / (per_sample_norms + 1e-6)
RuntimeError: stack expects each tensor to be equal size, but got [64] at entry 0 and [1] at entry 1
I am trying to finetune a gpt-like model with DPSGD to protect the data privacy. However I got an error (It works fine when use torch.optim.SGD) when applying privacy engine. Error is shown below.
And here are my code.
And my environment is shown below.
Could anyone help me with this?