Open ProGamerGov opened 2 years ago
Another recorded set of failures:
=================================== FAILURES ===================================
_____________ Test.test_softmax_classification_batch_zero_baseline _____________
self = <tests.attr.test_deeplift_classification.Test testMethod=test_softmax_classification_batch_zero_baseline>
def test_softmax_classification_batch_zero_baseline(self) -> None:
num_in = 40
input = torch.arange(0.0, num_in * 3.0, requires_grad=True).reshape(3, num_in)
baselines = 0
model = SoftmaxDeepLiftModel(num_in, 20, 10)
dl = DeepLift(model)
self.softmax_classification(
> model, dl, input, baselines, torch.tensor([2, 2, 2])
)
tests/attr/test_deeplift_classification.py:61:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/attr/test_deeplift_classification.py:169: in softmax_classification
self._assert_attributions(model, attributions, input, baselines, delta, target2)
tests/attr/test_deeplift_classification.py:187: in _assert_attributions
"some samples".format(delta),
E AssertionError: tensor(False) is not true : The sum of attribution values tensor([0.0007, 0.0020, 0.0034]) is not nearly equal to the difference between the endpoint for some samples
_ TestTracInRegression.test_tracin_regression_TracInCP_sample_wise_trick_features_20 _
a = (<tests.influence._core.test_tracin_regression.TestTracInRegression testMethod=test_tracin_regression_TracInCP_sample_wise_trick_features_20>,)
@wraps(func)
def standalone_func(*a):
> return func(*(a + p.args), **p.kwargs)
/usr/local/lib/python3.7/dist-packages/parameterized/parameterized.py:533:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/influence/_core/test_tracin_regression.py:153: in test_tracin_regression
self, train_scores, train_scores_sample_wise_trick
tests/helpers/basic.py:35: in assertTensorAlmostEqual
torch.sum(torch.abs(actual - expected)).item(), 0.0, delta=delta
E AssertionError: 6876.1328125 != 0.0 within 0.0001 delta (6876.1328125 difference)
_ TestTracInSelfInfluence.test_tracin_self_influence_TracInCPFastRandProjTests_unpack_inputs _
a = (<tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCPFastRandProjTests_unpack_inputs>,)
@wraps(func)
def standalone_func(*a):
> return func(*(a + p.args), **p.kwargs)
/usr/local/lib/python3.7/dist-packages/parameterized/parameterized.py:533:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/influence/_core/test_tracin_self_influence.py:74: in test_tracin_self_influence
mode="max",
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test = <tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCPFastRandProjTests_unpack_inputs>
actual = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7682e-06, 2.4623e+00,
1.3540e-05, 1.2162e+04])
expected = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7683e-06, 2.4623e+00,
1.3540e-05, 1.2161e+04])
delta = 0.01, mode = 'max'
def assertTensorAlmostEqual(test, actual, expected, delta=0.0001, mode="sum"):
assert isinstance(actual, torch.Tensor), (
"Actual parameter given for " "comparison must be a tensor."
)
if not isinstance(expected, torch.Tensor):
expected = torch.tensor(expected, dtype=actual.dtype)
assert (
actual.shape == expected.shape
), f"Expected tensor with shape: {expected.shape}. Actual shape {actual.shape}."
actual = actual.cpu()
expected = expected.cpu()
if mode == "sum":
test.assertAlmostEqual(
torch.sum(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
elif mode == "max":
# if both tensors are empty, they are equal but there is no max
if actual.numel() == expected.numel() == 0:
return
if actual.size() == torch.Size([]):
test.assertAlmostEqual(
torch.max(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
else:
for index, (input, ref) in enumerate(zip(actual, expected)):
almost_equal = abs(input - ref) <= delta
if hasattr(almost_equal, "__iter__"):
almost_equal = almost_equal.all()
assert (
almost_equal
), "Values at index {}, {} and {}, differ more than by {}".format(
> index, input, ref, delta
)
E AssertionError: Values at index 31, 12161.9365234375 and 12161.494140625, differ more than by 0.01
tests/helpers/basic.py:54: AssertionError
__ TestTracInSelfInfluence.test_tracin_self_influence_TracInCP_unpack_inputs ___
a = (<tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCP_unpack_inputs>,)
@wraps(func)
def standalone_func(*a):
> return func(*(a + p.args), **p.kwargs)
/usr/local/lib/python3.7/dist-packages/parameterized/parameterized.py:533:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/influence/_core/test_tracin_self_influence.py:74: in test_tracin_self_influence
mode="max",
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test = <tests.influence._core.test_tracin_self_influence.TestTracInSelfInfluence testMethod=test_tracin_self_influence_TracInCP_unpack_inputs>
actual = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7682e-06, 2.4623e+00,
1.3540e-05, 1.2162e+04])
expected = tensor([4.8968e+00, 1.9040e-01, 2.2268e+01, 1.5606e+03, 3.1314e-03, 3.2705e+01,
1.1193e+02, 1.5877e+01, 5.1077...0e+01,
4.1906e-04, 5.6764e+00, 9.5926e+01, 1.1055e-06, 9.7683e-06, 2.4623e+00,
1.3540e-05, 1.2161e+04])
delta = 0.01, mode = 'max'
def assertTensorAlmostEqual(test, actual, expected, delta=0.0001, mode="sum"):
assert isinstance(actual, torch.Tensor), (
"Actual parameter given for " "comparison must be a tensor."
)
if not isinstance(expected, torch.Tensor):
expected = torch.tensor(expected, dtype=actual.dtype)
assert (
actual.shape == expected.shape
), f"Expected tensor with shape: {expected.shape}. Actual shape {actual.shape}."
actual = actual.cpu()
expected = expected.cpu()
if mode == "sum":
test.assertAlmostEqual(
torch.sum(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
elif mode == "max":
# if both tensors are empty, they are equal but there is no max
if actual.numel() == expected.numel() == 0:
return
if actual.size() == torch.Size([]):
test.assertAlmostEqual(
torch.max(torch.abs(actual - expected)).item(), 0.0, delta=delta
)
else:
for index, (input, ref) in enumerate(zip(actual, expected)):
almost_equal = abs(input - ref) <= delta
if hasattr(almost_equal, "__iter__"):
almost_equal = almost_equal.all()
assert (
almost_equal
), "Values at index {}, {} and {}, differ more than by {}".format(
> index, input, ref, delta
)
E AssertionError: Values at index 31, 12161.9365234375 and 12161.494140625, differ more than by 0.01
🐛 Bug
When running tests for the optim module code I've been working on, occasionally some of the tests in other modules fail, and it looks as though that the repeat offenders can sometimes get values outside the expected range. I've verified that the failures occur on the master branch as well.
The
tests.attr.test_deeplift_classification.test_softmax_classification_batch_zero_baseline
test is particularly easy to make fail by running it more than once.The test failure log below is from the most recent batch of failures that I've noticed. Interestingly enough, only one test failing in another module is more rare than multiple failing.
To Reproduce
It happens relatively randomly as I'm running tests, with no discernible cause.
Expected behavior
The tests should always pass. Currently these kinds of failures seem to happen too often.
Environment
I'm installing Captum straight from the source via
!pip3 install -e .[dev]