When making inferences with torch.amp.autocast(), the forward results show significant numerical differences compared with pretrained resnext101_32x8d from torchvision as the sample outputs in the following given the same input batch:
Output from WSL pretrained resnext101_32x8d_wsl shows significant differences:
Is it because the pretrained resnext101 from torchvision is already trained in mixed precision or something else?
Any clarifications would be appreciated.
PS: sample pytest code to load the models and run the tests:
import torch as th
@pytest.fixture
def batch_size():
return 2
@pytest.fixture
def shape():
return 3, 720, 1280
@pytest.fixture
def dev():
return th.device('cuda') if th.cuda.is_available() else torch.device('cpu')
@pytest.fixture
def batch(batch_size, shape):
return th.rand(batch_size, *shape)
@pytest.fixture
def x101_32x8d(dev):
from torchvision.models.resnet import _resnet
from torchvision.models.resnet import Bottleneck
from torchvision.ops.misc import FrozenBatchNorm2d
kwargs = {}
frozen = True
kwargs['groups'] = gs = kwargs.get('groups', 32)
kwargs['width_per_group'] = gw = kwargs.get('width_per_group', 8)
kwargs['norm_layer'] = kwargs.get('norm_layer', FrozenBatchNorm2d if frozen else None)
arch = f"resnext101_{gs}x{gw}d"
model = _resnet(arch, Bottleneck, [3, 4, 23, 3], True, True, **kwargs)
model.to(dev).eval()
return model
@pytest.fixture
def x101_32x8d_wsl(dev):
from torchvision.ops.misc import FrozenBatchNorm2d
kwargs = {}
frozen = True
kwargs['groups'] = gs = kwargs.get('groups', 32)
kwargs['width_per_group'] = gw = kwargs.get('width_per_group', 8)
kwargs['norm_layer'] = kwargs.get('norm_layer', FrozenBatchNorm2d if frozen else None)
model = th.hub.load('facebookresearch/WSL-Images', 'resnext101_32x8d_wsl', **kwargs)
model.to(dev).eval()
return model
@pytest.mark.parametrize("B", [2])
def test_x101_amp(benchmark, x101_32x8d, dev, batch, B):
model = x101_32x8d
with th.no_grad():
with th.cuda.amp.autocast(enabled=False):
outputs_fp32 = model(batch[:B].to(dev)).float()
with th.cuda.amp.autocast():
outputs_amp = model(batch[:B].to(dev)).float()
for i, (output_fp32, output_amp) in enumerate(zip(outputs_fp32, outputs_amp)):
logging.info(f"output[{i}] shape={tuple(output_fp32.shape)}, norm_fp32={output_fp32.norm()}, norm_amp={output_amp.norm()}")
th.testing.assert_allclose(output_amp, output_fp32, rtol=1e-03, atol=3e-04)
@pytest.mark.parametrize("B", [2])
def test_x101_wsl_amp(benchmark, x101_32x8d_wsl, dev, batch, B):
model = x101_32x8d_wsl
with th.no_grad():
with th.cuda.amp.autocast(enabled=False):
outputs_fp32 = model(batch[:B].to(dev)).float()
with th.cuda.amp.autocast():
outputs_amp = model(batch[:B].to(dev)).float()
for i, (output_fp32, output_amp) in enumerate(zip(outputs_fp32, outputs_amp)):
logging.info(f"output[{i}] shape={tuple(output_fp32.shape)}, norm_fp32={output_fp32.norm()}, norm_amp={output_amp.norm()}")
th.testing.assert_allclose(output_amp, output_fp32, rtol=1e-03, atol=3e-04)
When making inferences
with torch.amp.autocast()
, the forward results show significant numerical differences compared with pretrainedresnext101_32x8d
fromtorchvision
as the sample outputs in the following given the same input batch:Output from
WSL
pretrainedresnext101_32x8d_wsl
shows significant differences:Output from
torchvision
pretrainedresnext101_32x8d
shows approximate numerical values:Is it because the pretrained resnext101 from torchvision is already trained in mixed precision or something else? Any clarifications would be appreciated.
PS: sample
pytest
code to load the models and run the tests: