pytorch / opacus

Training PyTorch models with differential privacy
https://opacus.ai
Apache License 2.0
1.68k stars 338 forks source link

More less the same value over different EPS values #587

Closed zedoul closed 3 weeks ago

zedoul commented 1 year ago

🐛 Bug

Hi,

A short summary: An opacus-based classfication model generates the similar outputs even if the model is trained on different EPS values on Wine dataset.

My sample code below could be easily reproduced on your local setup. Additionally, the implementation functions quite good when using the TensorFlow Privacy implementation.

To begin with, when the epsilon value surpasses certain thresholds, Opacus begins to exhibit unexpected and strange warnings as follows.

/python3.10/site-packages/opacus/accountants/analysis/prv/prvs.py:50: RuntimeWarning: invalid value encountered in log
  z = np.log((np.exp(t) + q - 1) / q)

I attempted to use Google Colab as you suggested, however, I am unfamiliar with its usage. Instead, I have copied and pasted the code and included the relevant dataset. The dataset that I used is the kaggle Wine data. Interestingly, regardless of the epsilon values I selected, the accuracy outcomes were quite similar. Moreover, if the epsilon value exceeds a specific threshold, the program stalls.

import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as torch_optim
import torch.nn.functional as F

import opacus

batch_size = 50
epochs = 10
lr = 0.0001
n_class = 10

df = pd.read_csv("winequality-red.csv")

X = df.iloc[:, 5:9]
y = df.iloc[:, -1]
X = X.to_numpy()
y = y.to_numpy()

assert(X.shape[1] == 4)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

train_priv_X, test_pub_X, train_priv_Y, test_pub_Y = train_test_split(X, y, test_size=0.2, stratify=y)

train_priv_Y = LabelEncoder().fit_transform(train_priv_Y)
test_pub_Y = LabelEncoder().fit_transform(test_pub_Y)

from torch.utils.data import Dataset, DataLoader
import numpy as np

class PowerDataset(Dataset):
    def __init__(self, X, Y):
        X = X.copy()
        self.X = X.astype(np.float32) #numerical columns
        self.y = Y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = PowerDataset(train_priv_X, train_priv_Y)
test_ds = PowerDataset(test_pub_X, test_pub_Y)
train_dl = DataLoader(train_ds, batch_size=batch_size,shuffle=True)
test_dl = DataLoader(test_ds, batch_size=batch_size,shuffle=True)

class PowerModel(nn.Module):
    def __init__(self, n_cont, n_class):
        super().__init__()
        self.n_cont = n_cont
        self.n_class = n_class
        self.lin1 = nn.Linear(self.n_cont, 20)
        self.lin2 = nn.Linear(20, 20)
        self.lin3 = nn.Linear(20, self.n_class)
        self.bn1 = nn.GroupNorm(1, self.n_cont)
        self.bn2 = nn.GroupNorm(1, 20)
        self.bn3 = nn.GroupNorm(1, 20)

    def forward(self, x_cont):
        x = self.bn1(x_cont)
        x = F.relu(self.lin1(x))
        x = self.bn2(x)
        x = F.relu(self.lin2(x))
        x = self.bn3(x)
        x = self.lin3(x)
        return x

n_col = train_priv_X.shape[1]
model = PowerModel(n_col, n_class)
optim = torch_optim.Adam(model.parameters(), lr=lr)

def train_model(model, optim, train_dl):
    model.train()
    total = 0
    sum_loss = 0
    for x, y in train_dl:
        batch = y.shape[0]
        output = model(x)
        loss = F.cross_entropy(output, y)
        optim.zero_grad()
        loss.backward()
        optim.step()
        total += batch
        sum_loss += batch*(loss.item())
    return sum_loss/total

def val_loss(model, valid_dl):
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0
    for x, y in valid_dl:
        current_batch_size = y.shape[0]
        out = model(x)
        loss = F.cross_entropy(out, y)
        sum_loss += current_batch_size*(loss.item())
        total += current_batch_size
        pred = torch.max(out, 1)[1]
        correct += (pred == y).float().sum().item()
    return sum_loss/total, correct/total

def train_loop(model, epochs, optim, train_dl, test_dl):
    for i in range(epochs):
        loss = train_model(model, optim, train_dl)
        print(i, "training loss: ", loss)
        vloss, accr = val_loss(model, train_dl)
        print("train: valid loss %.3f and accuracy %.3f" % (vloss, accr))
        vloss, accr = val_loss(model, test_dl)
        print("test: valid loss %.3f and accuracy %.3f" % (vloss, accr))

from sklearn import preprocessing

def calc_uncertainty(model, target_ds):
    target_dl = DataLoader(target_ds, batch_size=batch_size,shuffle=True)
    preds = []
    with torch.no_grad():
        for x,y in target_dl:
            out = model(x)
            prob = F.softmax(out, dim=1)
            preds.append(prob)

    final_probs = [item for sublist in preds for item in sublist]
    return final_probs

from opacus.validators import ModuleValidator

model = PowerModel(n_col, n_class)
m = ModuleValidator.fix(model)
optim = torch_optim.Adam(m.parameters(), lr=lr)

from opacus import PrivacyEngine

epsilon = 500
delta = 0.001
max_grad_norm = 1.0

privacy_engine = PrivacyEngine(secure_mode = False)
m, optim, train_dl = privacy_engine.make_private_with_epsilon(
        module=m,
        optimizer=optim,
        data_loader=train_dl,
        target_epsilon=epsilon,
        target_delta=delta,
        epochs = epochs,
        max_grad_norm=max_grad_norm)

train_loop(m, epochs, optim, train_dl, test_dl)
test_uncertain = calc_uncertainty(m, test_ds)

wine.csv

Please reproduce using our template Colab and post here the link

To Reproduce

  1. Copy and paste the code and the attached dataset in the same directory
  2. Establish environment
  3. python ./opacus_test.py

Expected behavior

A classification model based on Opacus-enhanced PyTorch classification model should generate different uncertainty values for different input.

Environment

Here is a requirements.txt file for the environment.

pandas
numpy
scikit-learn
torch==1.8.1
torchcsprng==0.2.1
torchvision
argparse
jupyter
scipy
opacus
HuanyuZhang commented 3 weeks ago

When epsilon value is large enough, the noise_multiplier will be very small which leads to minimal effects to the model performance.