lmjohns3 / theanets

Neural network toolkit for Python
http://theanets.rtfd.org
MIT License
328 stars 74 forks source link

Problem with reproducibility for release 0.6.2 #102

Closed tlikhomanenko closed 7 years ago

tlikhomanenko commented 8 years ago

Hi!

This code prints different values for different runs. Maybe, I missed some parameter in algo to fix random state, but also random.seed can't help fix reproducibility.

from __future__ import division, print_function, absolute_import

import numpy as np
import theanets

class Base(object):
    def __init__(self):
        self.NUM_EXAMPLES = 64
        self.NUM_INPUTS = 7
        self.NUM_OUTPUTS = 3
        self.NUM_CLASSES = 5

        np.random.seed(41)

        self.INPUTS = np.random.randn(self.NUM_EXAMPLES, self.NUM_INPUTS).astype('f')
        self.INPUT_WEIGHTS = abs(np.random.randn(self.NUM_EXAMPLES, self.NUM_INPUTS)).astype('f')
        self.OUTPUTS = np.random.randn(self.NUM_EXAMPLES, self.NUM_OUTPUTS).astype('f')
        self.OUTPUT_WEIGHTS = abs(np.random.randn(self.NUM_EXAMPLES, self.NUM_OUTPUTS)).astype('f')
        self.CLASSES = np.random.randn(self.NUM_EXAMPLES).astype('i')
        self.CLASS_WEIGHTS = abs(np.random.rand(self.NUM_EXAMPLES)).astype('f')

    def assert_progress(self, algo, data, **kwargs):
        trainer = self.exp.itertrain(
            data, algorithm=algo, monitor_gradients=True, batch_size=3, rng=11, nrng=13, **kwargs)
        train0, valid0 = next(trainer)
        train1, valid1 = next(trainer)
        assert train1['loss'] < valid0['loss']   # should have made progress!
        assert valid1['loss'] == valid0['loss']  # no new validation occurred
        return [train1['loss'], train0['loss'], valid1['loss'], valid0['loss']]

class TestClassifier(Base):
    def test_sgd(self):
        np.random.seed(41)

        self.exp = theanets.Experiment(
            theanets.Classifier,
            layers=(self.NUM_INPUTS, 10, self.NUM_CLASSES))
        print(self.assert_progress('sgd', [self.INPUTS, self.CLASSES]))
lmjohns3 commented 8 years ago

This remains an open issue for the 0.6.x release series. I will try to release a patched 0.6.something with a fix.

tlikhomanenko commented 7 years ago

Hi!

For version 0.7.3 this code now prints the same values.

from __future__ import division, print_function, absolute_import

import numpy as np
import theanets

class Base(object):
    def __init__(self):
        self.NUM_EXAMPLES = 64
        self.NUM_INPUTS = 7
        self.NUM_OUTPUTS = 3
        self.NUM_CLASSES = 5

        # fix dataset in experimints
        np.random.seed(41)

        self.INPUTS = np.random.randn(self.NUM_EXAMPLES, self.NUM_INPUTS).astype('f')
        self.INPUT_WEIGHTS = abs(np.random.randn(self.NUM_EXAMPLES, self.NUM_INPUTS)).astype('f')
        self.OUTPUTS = np.random.randn(self.NUM_EXAMPLES, self.NUM_OUTPUTS).astype('f')
        self.OUTPUT_WEIGHTS = abs(np.random.randn(self.NUM_EXAMPLES, self.NUM_OUTPUTS)).astype('f')
        self.CLASSES = np.random.randn(self.NUM_EXAMPLES).astype('i')
        self.CLASS_WEIGHTS = abs(np.random.rand(self.NUM_EXAMPLES)).astype('f')

    def assert_progress(self, algo, data, **kwargs):
        # fix rng in parameters
        trainer = self.exp.itertrain(
            data, algorithm=algo, monitor_gradients=True, batch_size=3, rng=11, **kwargs)
        train0, valid0 = next(trainer)
        train1, valid1 = next(trainer)
        assert train1['loss'] < valid0['loss']   # should have made progress!
        assert valid1['loss'] == valid0['loss']  # no new validation occurred
        return [train1['loss'], train0['loss'], valid1['loss'], valid0['loss']]

class TestClassifier(Base):
    def test_sgd(self):
        self.exp = theanets.Experiment(
            theanets.Classifier,
            layers=(self.NUM_INPUTS, 10, self.NUM_CLASSES))
        print(self.assert_progress('sgd', [self.INPUTS, self.CLASSES]))

Thanks!