A short summary: An opacus-based classfication model generates the similar outputs even if the model is trained on different EPS values on Wine dataset.
My sample code below could be easily reproduced on your local setup. Additionally, the implementation functions quite good when using the TensorFlow Privacy implementation.
To begin with, when the epsilon value surpasses certain thresholds, Opacus begins to exhibit unexpected and strange warnings as follows.
/python3.10/site-packages/opacus/accountants/analysis/prv/prvs.py:50: RuntimeWarning: invalid value encountered in log
z = np.log((np.exp(t) + q - 1) / q)
I attempted to use Google Colab as you suggested, however, I am unfamiliar with its usage. Instead, I have copied and pasted the code and included the relevant dataset. The dataset that I used is the kaggle Wine data. Interestingly, regardless of the epsilon values I selected, the accuracy outcomes were quite similar. Moreover, if the epsilon value exceeds a specific threshold, the program stalls.
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as torch_optim
import torch.nn.functional as F
import opacus
batch_size = 50
epochs = 10
lr = 0.0001
n_class = 10
df = pd.read_csv("winequality-red.csv")
X = df.iloc[:, 5:9]
y = df.iloc[:, -1]
X = X.to_numpy()
y = y.to_numpy()
assert(X.shape[1] == 4)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
train_priv_X, test_pub_X, train_priv_Y, test_pub_Y = train_test_split(X, y, test_size=0.2, stratify=y)
train_priv_Y = LabelEncoder().fit_transform(train_priv_Y)
test_pub_Y = LabelEncoder().fit_transform(test_pub_Y)
from torch.utils.data import Dataset, DataLoader
import numpy as np
class PowerDataset(Dataset):
def __init__(self, X, Y):
X = X.copy()
self.X = X.astype(np.float32) #numerical columns
self.y = Y
def __len__(self):
return len(self.y)
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
train_ds = PowerDataset(train_priv_X, train_priv_Y)
test_ds = PowerDataset(test_pub_X, test_pub_Y)
train_dl = DataLoader(train_ds, batch_size=batch_size,shuffle=True)
test_dl = DataLoader(test_ds, batch_size=batch_size,shuffle=True)
class PowerModel(nn.Module):
def __init__(self, n_cont, n_class):
super().__init__()
self.n_cont = n_cont
self.n_class = n_class
self.lin1 = nn.Linear(self.n_cont, 20)
self.lin2 = nn.Linear(20, 20)
self.lin3 = nn.Linear(20, self.n_class)
self.bn1 = nn.GroupNorm(1, self.n_cont)
self.bn2 = nn.GroupNorm(1, 20)
self.bn3 = nn.GroupNorm(1, 20)
def forward(self, x_cont):
x = self.bn1(x_cont)
x = F.relu(self.lin1(x))
x = self.bn2(x)
x = F.relu(self.lin2(x))
x = self.bn3(x)
x = self.lin3(x)
return x
n_col = train_priv_X.shape[1]
model = PowerModel(n_col, n_class)
optim = torch_optim.Adam(model.parameters(), lr=lr)
def train_model(model, optim, train_dl):
model.train()
total = 0
sum_loss = 0
for x, y in train_dl:
batch = y.shape[0]
output = model(x)
loss = F.cross_entropy(output, y)
optim.zero_grad()
loss.backward()
optim.step()
total += batch
sum_loss += batch*(loss.item())
return sum_loss/total
def val_loss(model, valid_dl):
model.eval()
total = 0
sum_loss = 0
correct = 0
for x, y in valid_dl:
current_batch_size = y.shape[0]
out = model(x)
loss = F.cross_entropy(out, y)
sum_loss += current_batch_size*(loss.item())
total += current_batch_size
pred = torch.max(out, 1)[1]
correct += (pred == y).float().sum().item()
return sum_loss/total, correct/total
def train_loop(model, epochs, optim, train_dl, test_dl):
for i in range(epochs):
loss = train_model(model, optim, train_dl)
print(i, "training loss: ", loss)
vloss, accr = val_loss(model, train_dl)
print("train: valid loss %.3f and accuracy %.3f" % (vloss, accr))
vloss, accr = val_loss(model, test_dl)
print("test: valid loss %.3f and accuracy %.3f" % (vloss, accr))
from sklearn import preprocessing
def calc_uncertainty(model, target_ds):
target_dl = DataLoader(target_ds, batch_size=batch_size,shuffle=True)
preds = []
with torch.no_grad():
for x,y in target_dl:
out = model(x)
prob = F.softmax(out, dim=1)
preds.append(prob)
final_probs = [item for sublist in preds for item in sublist]
return final_probs
from opacus.validators import ModuleValidator
model = PowerModel(n_col, n_class)
m = ModuleValidator.fix(model)
optim = torch_optim.Adam(m.parameters(), lr=lr)
from opacus import PrivacyEngine
epsilon = 500
delta = 0.001
max_grad_norm = 1.0
privacy_engine = PrivacyEngine(secure_mode = False)
m, optim, train_dl = privacy_engine.make_private_with_epsilon(
module=m,
optimizer=optim,
data_loader=train_dl,
target_epsilon=epsilon,
target_delta=delta,
epochs = epochs,
max_grad_norm=max_grad_norm)
train_loop(m, epochs, optim, train_dl, test_dl)
test_uncertain = calc_uncertainty(m, test_ds)
🐛 Bug
Hi,
A short summary: An opacus-based classfication model generates the similar outputs even if the model is trained on different EPS values on Wine dataset.
My sample code below could be easily reproduced on your local setup. Additionally, the implementation functions quite good when using the TensorFlow Privacy implementation.
To begin with, when the epsilon value surpasses certain thresholds, Opacus begins to exhibit unexpected and strange warnings as follows.
I attempted to use Google Colab as you suggested, however, I am unfamiliar with its usage. Instead, I have copied and pasted the code and included the relevant dataset. The dataset that I used is the kaggle Wine data. Interestingly, regardless of the epsilon values I selected, the accuracy outcomes were quite similar. Moreover, if the epsilon value exceeds a specific threshold, the program stalls.
wine.csv
Please reproduce using our template Colab and post here the link
To Reproduce
Expected behavior
A classification model based on Opacus-enhanced PyTorch classification model should generate different uncertainty values for different input.
Environment
Here is a requirements.txt file for the environment.
conda
,pip
, source): pip