Memory Leak in Volume Generalisation

I have generalised your loss to Grayscale Volume (MRI images) and I am obtaining memory leaks if I use any optimizer BUT the fused Adam from Apex.

The way I generalised it is by adding in:

adaptive.py

class AdaptiveVolumeLossFunction(nn.Module):
"""Sets up the adaptive form of the robust loss on a set of Volumes.

This function is a wrapper around AdaptiveLossFunction. It requires inputs
of a specific shape and size, and constructs internal parameters describing
each non-batch dimension.

Args:
  image_size: The size (depth, width, height, num_channels) of the input images.
  float_dtype: The dtype of the floats used as input.
  device: The device to use.
"""

def __init__(self, image_size, device, float_dtype=np.float32, **kwargs):
    super(AdaptiveVolumeLossFunction, self).__init__()

    assert len(image_size) == 4

    self.image_size = image_size

    if float_dtype == np.float32:
        float_dtype = torch.float32
    if float_dtype == np.float64:
        float_dtype = torch.float64
    self.float_dtype = float_dtype

    self.device = device

    # Modification because we set the device at parsing time
    if not isinstance(device, torch.device):
        if (
            isinstance(device, int)
            or (isinstance(device, str) and "cuda" in device)
        ):
            torch.cuda.set_device(self.device)

    x_example = torch.zeros([1] + list(self.image_size)).type(self.float_dtype)

    x_example_mat = self.transform_to_mat(x_example)

    self.num_dims = x_example_mat.shape[1]

    self.adaptive_lossfun = AdaptiveLossFunction(
        self.num_dims, self.float_dtype, self.device, **kwargs
    )

def lossfun(self, x):
    x_mat = self.transform_to_mat(x)

    loss_mat = self.adaptive_lossfun.lossfun(x_mat)

    # Reshape the loss function's outputs to have the shapes as the input.
    loss = torch.reshape(loss_mat, [-1] + list(self.image_size))

    return loss

def alpha(self):
    return torch.reshape(self.adaptive_lossfun.alpha(), self.image_size)

def scale(self):
    return torch.reshape(self.adaptive_lossfun.scale(), self.image_size)

def transform_to_mat(self, x):
    """Transforms a batch of images to a volume."""

    assert len(x.shape) == 5

    x = torch.as_tensor(x)

    # Reshape `x` from
    #   (num_batches, depth, width, height, num_channels) to
    #   (num_batches * num_channels, width, height, depth)
    _, depth, width, height, num_channels = x.shape

    x_stack = torch.reshape(x.permute(0, 4, 1, 2, 3), (-1, depth, width, height))

    x_stack = util.volume_dct(x_stack)

    x_mat = torch.reshape(
        torch.reshape(x_stack, (-1, num_channels, depth, width, height)).permute(
            0, 2, 3, 4, 1
        ),
        [-1, width * height * depth * num_channels],
    )
    return x_mat

utils.py


def volume_dct(image):
"""Does a type-II DCT (aka "The DCT") on axes 1 and 2 of a rank-3 tensor."""
return torch_dct.dct_3d(torch.as_tensor(image), norm="ortho")

def volume_idct(dct_x): """Inverts image_dct(), by performing a type-III DCT.""" return torch_dct.idct_3d(torch.as_tensor(dct_x), norm="ortho")



Given the high complexity of the loss, I would really appreciate any pointers towards possible locations for bugs.

jonbarron / robust_loss_pytorch

Memory Leak in Volume Generalisation #18