Closed jS5t3r closed 7 months ago
I have installed Python 3.8, Opacus 1.4.0, torch 2.1.1, and torchvision 0.16.1.
My implementation contains a BERT LLM sequence classifier.
import torch import torch.nn as nn import torch.optim as optim from transformers import BertForSequenceClassification, BertTokenizer from datasets import load_dataset from torch.utils.data import DataLoader from opacus import PrivacyEngine # Load and preprocess the SST-2 dataset dataset = load_dataset("glue", "sst2") train_dataset = dataset["train"] # Load pre-trained BERT model and tokenizer model = BertForSequenceClassification.from_pretrained("bert-base-uncased") tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # Define data loader def collate_fn(batch): inputs = tokenizer(batch["sentence"], padding=True, truncation=True, max_length=128, return_tensors="pt") inputs["labels"] = torch.tensor(batch["label"]) return inputs train_loader = DataLoader(train_dataset, batch_size=32, collate_fn=collate_fn) # Define optimizer optimizer = optim.SGD(model.parameters(), lr=0.01) # Define privacy engine privacy_engine = PrivacyEngine() model, optimizer, train_loader = privacy_engine.make_private( module=model, optimizer=optimizer, data_loader=train_loader, noise_multiplier=1.0, max_grad_norm=1.0, ) # Define loss function criterion = nn.CrossEntropyLoss() # Train the model for epoch in range(5): # Adjust number of epochs as needed model.train() for batch in train_loader: optimizer.zero_grad() inputs = {key: value.cuda() for key, value in batch.items()} outputs = model(**inputs) loss = criterion(outputs.logits, inputs["labels"].cuda()) loss.backward() optimizer.step() print(f"Epoch {epoch+1} finished.")
Error:
UnsupportedModuleError Traceback (most recent call last) Cell In[126], line 30 28 # Define privacy engine 29 privacy_engine = PrivacyEngine() ---> 30 model, optimizer, train_loader = privacy_engine.make_private( 31 module=model, 32 optimizer=optimizer, 33 data_loader=train_loader, 34 noise_multiplier=1.0, 35 max_grad_norm=1.0, 36 ) 38 # Define loss function 39 criterion = nn.CrossEntropyLoss() File ~/.conda/envs/def2/lib/python3.8/site-packages/opacus/privacy_engine.py:399, in PrivacyEngine.make_private(self, module, optimizer, data_loader, noise_multiplier, max_grad_norm, batch_first, loss_reduction, poisson_sampling, clipping, noise_generator, grad_sample_mode) 393 raise ValueError( 394 "Module parameters are different than optimizer Parameters" 395 ) 397 distributed = isinstance(module, (DPDDP, DDP)) --> 399 module = self._prepare_model( 400 module, 401 batch_first=batch_first, 402 loss_reduction=loss_reduction, 403 grad_sample_mode=grad_sample_mode, 404 ) 405 if poisson_sampling: 406 module.register_backward_hook(forbid_accumulation_hook) File ~/.conda/envs/def2/lib/python3.8/site-packages/opacus/privacy_engine.py:224, in PrivacyEngine._prepare_model(self, module, batch_first, loss_reduction, grad_sample_mode) 214 def _prepare_model( 215 self, 216 module: nn.Module, (...) 222 # Ideally, validation should have been taken care of by calling 223 # `get_compatible_module()` --> 224 self.validate(module=module, optimizer=None, data_loader=None) 226 # wrap 227 if isinstance(module, AbstractGradSampleModule): File ~/.conda/envs/def2/lib/python3.8/site-packages/opacus/privacy_engine.py:289, in PrivacyEngine.validate(self, module, optimizer, data_loader) 269 def validate( 270 self, 271 *, (...) 274 data_loader: Optional[DataLoader], 275 ): 276 """ 277 Validate that task components are compatible with DP. 278 Same as ``is_compatible()``, but raises error instead of returning bool. (...) 287 If one or more modules found to be incompatible 288 """ --> 289 ModuleValidator.validate(module, strict=True) File ~/.conda/envs/def2/lib/python3.8/site-packages/opacus/validators/module_validator.py:69, in ModuleValidator.validate(cls, module, strict) 67 # raise/return as needed 68 if strict and len(errors) > 0: ---> 69 raise UnsupportedModuleError(errors) 70 else: 71 return errors UnsupportedModuleError: [IllegalModuleConfigurationError('Model needs to be in training mode')]
could you run model.train() before privacy_engine?
🐛 Bug
I have installed Python 3.8, Opacus 1.4.0, torch 2.1.1, and torchvision 0.16.1.
My implementation contains a BERT LLM sequence classifier.
Error: