tensorflow / privacy

Library for training machine learning models with privacy for training data
Apache License 2.0
1.94k stars 453 forks source link

Issue with variational autoencoder loss and DP training #92

Open khalidaskia opened 4 years ago

khalidaskia commented 4 years ago

Hi @npapernot ,

I create this new issue because I didn't find a way to re-open the previous one. (https://github.com/tensorflow/privacy/issues/91) My issue : I am trying to use DPAdamGaussianOptimizer to train a variational autoencoder. But I am having an error during the training.

Here is my code for reproducibility

import sys

import tensorflow as tf

from absl import app
from absl import flags

from tensorflow_privacy.privacy.analysis import privacy_ledger
from tensorflow_privacy.privacy.optimizers import dp_optimizer
flags.DEFINE_float('learning_rate', 0.001, 'Learning rate for training')
flags.DEFINE_float('noise_multiplier', 0.001,
                   'Ratio of the standard deviation to the clipping norm')
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
flags.DEFINE_integer('batch_size', 256, 'Batch size')
flags.DEFINE_integer('epochs', 60, 'Number of epochs')
flags.DEFINE_integer('microbatches', 256, 'Number of microbatches ')

FLAGS = flags.FLAGS

FLAGS(sys.argv)

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

NB_TRAIN = 60000

# Network Parameters
image_dim = 784  
hidden_dim = 512
latent_dim = 2

# A custom initialization (see Xavier Glorot init)
def glorot_init(shape):
    return tf.random_normal(shape=shape, stddev=1. / tf.sqrt(shape[0] / 2.))
# Variables
weights = {
    'encoder_h1': tf.Variable(glorot_init([image_dim, hidden_dim])),
    'z_mean': tf.Variable(glorot_init([hidden_dim, latent_dim])),
    'z_std': tf.Variable(glorot_init([hidden_dim, latent_dim])),
    'decoder_h1': tf.Variable(glorot_init([latent_dim, hidden_dim])),
    'decoder_out': tf.Variable(glorot_init([hidden_dim, image_dim]))
}
biases = {
    'encoder_b1': tf.Variable(glorot_init([hidden_dim])),
    'z_mean': tf.Variable(glorot_init([latent_dim])),
    'z_std': tf.Variable(glorot_init([latent_dim])),
    'decoder_b1': tf.Variable(glorot_init([hidden_dim])),
    'decoder_out': tf.Variable(glorot_init([image_dim]))
}
# Building the encoder
input_image = tf.placeholder(tf.float32, shape=[None, image_dim])
encoder = tf.matmul(input_image, weights['encoder_h1']) + biases['encoder_b1']
encoder = tf.nn.tanh(encoder)
z_mean = tf.matmul(encoder, weights['z_mean']) + biases['z_mean']
z_std = tf.matmul(encoder, weights['z_std']) + biases['z_std']

# Sampler: Normal (gaussian) random distribution
eps = tf.random_normal(tf.shape(z_std), dtype=tf.float32, mean=0., stddev=1.0,
                       name='epsilon')
z = z_mean + tf.exp(z_std / 2) * eps

# Building the decoder

decoder = tf.matmul(z, weights['decoder_h1']) + biases['decoder_b1']
decoder = tf.nn.tanh(decoder)
decoder = tf.matmul(decoder, weights['decoder_out']) + biases['decoder_out']
decoder = tf.nn.sigmoid(decoder)

VAE Loss, I think the issue is here


 def vae_loss(x_reconstructed, x_true):
   # Reconstruction loss
     encode_decode_loss = x_true * tf.log(1e-10 + x_reconstructed)  + (1 - x_true) * tf.log(1e-10 + 1 - x_reconstructed)
    encode_decode_loss = -tf.reduce_sum(encode_decode_loss, 1)
    # KL Divergence loss
    kl_div_loss = 1 + z_std - tf.square(z_mean) - tf.exp(z_std)
    kl_div_loss = -0.5 * tf.reduce_sum(kl_div_loss, 1)
    return tf.reduce_mean(encode_decode_loss + kl_div_loss)

loss_op = vae_loss(decoder, input_image)

ledger = privacy_ledger.PrivacyLedger(
          population_size=NB_TRAIN, 
          selection_probability=(FLAGS.batch_size / NB_TRAIN))

 optimizer = dp_optimizer.DPAdamGaussianOptimizer(
      l2_norm_clip=FLAGS.l2_norm_clip, 
      noise_multiplier=FLAGS.noise_multiplier,
      num_microbatches=FLAGS.microbatches,
      ledger=ledger,
      learning_rate=FLAGS.learning_rate,
      unroll_microbatches=True)

global_step = tf.train.get_global_step()

The line below raised the error !

train_op = optimizer.minimize(loss=vae_loss, global_step=global_step) 

Thank you for your help !

Billy1900 commented 4 years ago

Hi @npapernot ,

I create this new issue because I didn't find a way to re-open the previous one. (#91) My issue : I am trying to use DPAdamGaussianOptimizer to train a variational autoencoder. But I am having an error during the training.

Here is my code for reproducibility

import sys

import tensorflow as tf

from absl import app
from absl import flags

from tensorflow_privacy.privacy.analysis import privacy_ledger
from tensorflow_privacy.privacy.optimizers import dp_optimizer
flags.DEFINE_float('learning_rate', 0.001, 'Learning rate for training')
flags.DEFINE_float('noise_multiplier', 0.001,
                   'Ratio of the standard deviation to the clipping norm')
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
flags.DEFINE_integer('batch_size', 256, 'Batch size')
flags.DEFINE_integer('epochs', 60, 'Number of epochs')
flags.DEFINE_integer('microbatches', 256, 'Number of microbatches ')

FLAGS = flags.FLAGS

FLAGS(sys.argv)

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

NB_TRAIN = 60000

# Network Parameters
image_dim = 784  
hidden_dim = 512
latent_dim = 2

# A custom initialization (see Xavier Glorot init)
def glorot_init(shape):
    return tf.random_normal(shape=shape, stddev=1. / tf.sqrt(shape[0] / 2.))
# Variables
weights = {
    'encoder_h1': tf.Variable(glorot_init([image_dim, hidden_dim])),
    'z_mean': tf.Variable(glorot_init([hidden_dim, latent_dim])),
    'z_std': tf.Variable(glorot_init([hidden_dim, latent_dim])),
    'decoder_h1': tf.Variable(glorot_init([latent_dim, hidden_dim])),
    'decoder_out': tf.Variable(glorot_init([hidden_dim, image_dim]))
}
biases = {
    'encoder_b1': tf.Variable(glorot_init([hidden_dim])),
    'z_mean': tf.Variable(glorot_init([latent_dim])),
    'z_std': tf.Variable(glorot_init([latent_dim])),
    'decoder_b1': tf.Variable(glorot_init([hidden_dim])),
    'decoder_out': tf.Variable(glorot_init([image_dim]))
}
# Building the encoder
input_image = tf.placeholder(tf.float32, shape=[None, image_dim])
encoder = tf.matmul(input_image, weights['encoder_h1']) + biases['encoder_b1']
encoder = tf.nn.tanh(encoder)
z_mean = tf.matmul(encoder, weights['z_mean']) + biases['z_mean']
z_std = tf.matmul(encoder, weights['z_std']) + biases['z_std']

# Sampler: Normal (gaussian) random distribution
eps = tf.random_normal(tf.shape(z_std), dtype=tf.float32, mean=0., stddev=1.0,
                       name='epsilon')
z = z_mean + tf.exp(z_std / 2) * eps

# Building the decoder

decoder = tf.matmul(z, weights['decoder_h1']) + biases['decoder_b1']
decoder = tf.nn.tanh(decoder)
decoder = tf.matmul(decoder, weights['decoder_out']) + biases['decoder_out']
decoder = tf.nn.sigmoid(decoder)

VAE Loss, I think the issue is here


 def vae_loss(x_reconstructed, x_true):
   # Reconstruction loss
     encode_decode_loss = x_true * tf.log(1e-10 + x_reconstructed)  + (1 - x_true) * tf.log(1e-10 + 1 - x_reconstructed)
    encode_decode_loss = -tf.reduce_sum(encode_decode_loss, 1)
    # KL Divergence loss
    kl_div_loss = 1 + z_std - tf.square(z_mean) - tf.exp(z_std)
    kl_div_loss = -0.5 * tf.reduce_sum(kl_div_loss, 1)
    return tf.reduce_mean(encode_decode_loss + kl_div_loss)

loss_op = vae_loss(decoder, input_image)

ledger = privacy_ledger.PrivacyLedger(
          population_size=NB_TRAIN, 
          selection_probability=(FLAGS.batch_size / NB_TRAIN))

 optimizer = dp_optimizer.DPAdamGaussianOptimizer(
      l2_norm_clip=FLAGS.l2_norm_clip, 
      noise_multiplier=FLAGS.noise_multiplier,
      num_microbatches=FLAGS.microbatches,
      ledger=ledger,
      learning_rate=FLAGS.learning_rate,
      unroll_microbatches=True)

global_step = tf.train.get_global_step()

The line below raised the error !

train_op = optimizer.minimize(loss=vae_loss, global_step=global_step) 

Thank you for your help !

do you figure out how to make it? I'm also interested in this one.

Billy1900 commented 4 years ago

I have solved the issue, you could close

XinyiYS commented 4 years ago

I have solved the issue, you could close

I am also interested in working with dp-VAE. May I ask how you resolved the issue?

Nada-Bu commented 1 year ago

I have solved the issue, you could close

Can we know how you resolved this issue?

Thanks.