tensorflow / probability

Probabilistic reasoning and statistical analysis in TensorFlow
https://www.tensorflow.org/probability/
Apache License 2.0
4.26k stars 1.1k forks source link

Apply prior on a simple linear regression #439

Open TheoLvs opened 5 years ago

TheoLvs commented 5 years ago

Hello everyone ! I am both a beginner in bayesian stats and probabilistic programming,

I am trying to do simple linear regression with a toy dataset with a predefined prior on the bias. So far I have found several ways of doing bayesian linear regressions with TFP (correct me if I'm wrong) including: using a shallow neural network with one Dense flipout layer, GLMs, STS models, Edward2 models.

Let's say my dataset is the linear relationship 3x + 1 + noise, what would be the best method to fit a linear regression saying explicity my prior was distributed on a Normal distribution centered in 10 (to force the bias posterior to be a compromise between 1 and 10).

So far I found that maybe the DenseFlipout option was the best with a simple architecture as such:

model = tf.keras.Sequential([
    tfp.layers.DenseFlipout(
        1,
        activation = "linear",
        bias_prior_fn = tfp.layers.default_mean_field_normal_fn()
    )
])

model.compile(optimizer = "adam",loss = "mse")

But do I define a custom prior on the bias ? The documentation around the default_mean_field_normal_fn() is obscure.

Also my goal is to be the more generalizable as possible, with custom distributions for the priors, or priors on the coefficients as well.

Thanks a lot ! Theo

AngelBerihuete commented 5 years ago

Hi @TheoLvs

Maybe you prefer to use edward 2 in order to take control on your priors... for instance a MAP inference would be coded as following

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
# import tensorflow_probability as tfp
from tensorflow_probability import edward2 as ed

plt.style.use('ggplot')

def build_toy_dataset(N, w, intercept, noise_std=0.25):
    D = len(w)
    x = np.random.randn(N, D)
    y = np.dot(x, w) + np.random.normal(intercept, noise_std, size=N)
    return x.astype(np.float32), y.astype(np.float32)

# ed.set_seed(42)
N = 40  # number of data points
D = 1  # number of features
# w_true = np.random.randn(D)
w_true = np.array([3.])
intercept_true = np.array([1.])

X_train, y_train = build_toy_dataset(N, w_true, intercept_true)
X_test, y_test = build_toy_dataset(N, w_true, intercept_true)

plt.scatter(X_train, y_train)
plt.show()

def regression_model(regressors):
    w = ed.Normal(
        loc=tf.zeros(regressors.shape[1]),
        scale=tf.ones(regressors.shape[1]),
        name="w")
    b = ed.Normal(loc=0., scale=1., name="b")
    y = ed.Normal(
        loc=tf.tensordot(regressors, w, [[1], [0]]) + b,
        scale=tf.ones(N), name="y")
    return y

log_joint = ed.make_log_joint_fn(regression_model)

y = regression_model(X_train)

with tf.Session() as sess:  # or, e.g., tf.train.MonitoredSession()
    y_train2 = sess.run(y)

tf.reset_default_graph()

w = tf.Variable(np.ones([X_train.shape[1]]), dtype=tf.float32)
b = tf.Variable(1., dtype=tf.float32)

def target(w, b):
    """Unnormalized target density as a function of the parameters."""
    return log_joint(regressors=X_train, w=w, b=b, y=y_train)

energy = -target(w, b)

optimizer = tf.train.AdamOptimizer(learning_rate=0.05)
train = optimizer.minimize(energy)

init = tf.global_variables_initializer()

t = []

num_epochs = 200

with tf.Session() as sess:
    sess.run(init)

    for i in range(num_epochs):
        sess.run(train)
        if i % 5 == 0:
            cE, cw, cb = sess.run([energy, w, b])
            t.append(cE)

    w_inferred_map = sess.run(w)
    b_inferred_map = sess.run(b)

with ed.interception(ed.make_value_setter(
        w=w_inferred_map, b=b_inferred_map)):
    generate = regression_model(X_train)

with tf.Session() as sess:
    y_generated = sess.run(generate)

plt.scatter(X_train, y_train, color='blue', label='Actual data')
plt.scatter(X_train, y_generated,
            color='red', label='Simulated data (MAP)')
plt.legend()
plt.show()
junpenglao commented 5 years ago

Or you can try using JointDistribution*, see an example here: https://colab.research.google.com/github/tensorflow/probability/blob/master/tensorflow_probability/examples/jupyter_notebooks/Modeling_with_JointDistribution.ipynb#scrollTo=4RG3iyZQnkOR

AngelBerihuete commented 5 years ago

Hi @junpenglao

Do you have some experience running these simple examples using a distributed training? I've a multi-GPU machine and I want to use tf.distribute.MirroredStrategy... but the process does not finish... I see high GPU Memory-Usage but 0% volatile gpu-util.

>>> tf.__version__
'1.14.1-dev20190614'
>>> tfp.__version__
'0.8.0-dev20190614'
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
# import pandas as pd
# import re
# from string import ascii_uppercase

import tensorflow as tf
from tensorflow_probability import edward2 as ed
# ed.set_seed(42)
N = 4000  # number of data points
D = 10  # number of features
w_true = np.random.randn(D)

regressors = np.random.randn(N, D).astype('float32')

def build_toy_dataset(x, w, noise_std=0.25):
    features = np.dot(x, w) + np.random.normal(
        0, noise_std, size=x.shape[0])
    labels = np.repeat(1, N)
    return features.astype(dtype='int32'), labels.astype(dtype='int32')

features_, labels_ = build_toy_dataset(regressors, w_true)

strategy = tf.contrib.distribute.MirroredStrategy()

print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

# session_config = config = tf.ConfigProto()
# session_config.gpu_options.allow_growth = True
config = tf.estimator.RunConfig(
    train_distribute=strategy, eval_distribute=strategy)

def train_input_fn():
    """An input function for training"""
    # Convert the inputs to a Dataset.
    features = tf.data.Dataset.from_tensors(features_)
    labels = tf.data.Dataset.from_tensors(labels_)
    dataset = tf.data.Dataset.zip((features, labels))
    # Shuffle, repeat, and batch the examples.
    return dataset

def eval_input_fn():
    features = tf.data.Dataset.from_tensors(features_)
    labels = tf.data.Dataset.from_tensors(labels_)
    return tf.data.Dataset.zip((features, labels))

def predict_input_fn():
    predict_features = tf.data.Dataset.from_tensors(features_[0:10]).repeat(10)
    return predict_features

# def test_input_fn():
#     features = tf.data.Dataset.from_tensors(X_test)
#     outputs = tf.data.Dataset.from_tensors(Y_test)
#     dataset = tf.data.Dataset.zip((features, outputs))
#     dataset = dataset.shuffle(buffer_size=N)
#     return dataset

def build_model_fn_optimizer():
    """Simple model_fn with optimizer."""

    # TODO(anjalisridhar): Move this inside the model_fn once
    # OptimizerV2 is done?
    optimizer = tf.train.AdamOptimizer(learning_rate=0.05)

    def model_fn(features, labels, mode, params):

        regressors = params['regressors']

        def regression_model(input):
            w = ed.Normal(
                loc=tf.zeros(input.shape[1]),
                scale=tf.ones(input.shape[1]),
                name="w")

            b = ed.Normal(loc=0., scale=1., name="b")

            y = ed.Normal(
                loc=tf.tensordot(input, w, [[1], [0]]) + b,
                scale=tf.ones(N), name="y")
            return y

        log_joint = ed.make_log_joint_fn(regression_model)

        y = regression_model(regressors)

        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {"y": y}
            return tf.estimator.EstimatorSpec(mode,
                                              predictions=predictions)

        var_1 = tf.Variable(name='var_1',
                            initial_value=regressors.shape[1]*[1.])
        var_2 = tf.Variable(name='var_2', initial_value=0.5)

        def loss_fn():
            return -log_joint(regressors, w=var_1, b=var_2, y=features)

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode, loss=loss_fn())

        assert mode == tf.estimator.ModeKeys.TRAIN

        global_step = tf.train.get_global_step()
        train_op = optimizer.minimize(loss_fn(), global_step=global_step)
        return tf.estimator.EstimatorSpec(mode,
                                          loss=loss_fn(), train_op=train_op)

    return model_fn

steps = 2000

estimator = tf.estimator.Estimator(
    model_fn=build_model_fn_optimizer(),
    params={'regressors': regressors},
    config=config)

estimator.train(input_fn=train_input_fn, steps=steps)

eval_result = estimator.evaluate(input_fn=eval_input_fn, steps=steps)
print("Eval result: {}".format(eval_result))
junpenglao commented 5 years ago

I never tried using tf.distribute.MirroredStrategy - maybe other devs will have more experience @brianwa84 @csuter @jvdillon