dgcnz / dl2

Code for "Effect of equivariance on training dynamics"
1 stars 0 forks source link

`aten::grid_sampler_3d_backward` is not implemented #78

Open dgcnz opened 2 weeks ago

dgcnz commented 2 weeks ago
/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/torch/autograd/graph.py:744: UserWarning: Using backward() with create_graph=True will create a reference cycle between the parameter and its gradient which can cause a memory leak. We recommend using autograd.grad when creating the graph to avoid this. If you have to use this function, make sure to reset the .grad fields of your parameters to None after use to break the cycle and avoid the leak. (Triggered internally at ../torch/csrc/autograd/engine.cpp:1203.)
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  0%|          | 0/8 [00:01<?, ?it/s]
Traceback (most recent call last):
  File "/gpfs/home6/scur0399/development/dl2/src/compute_measures_v2.py", line 103, in main
    get_spectrum(cfg, config, datamodule, model)
  File "/gpfs/home6/scur0399/development/dl2/src/metrics/hessian_spectrum.py", line 19, in get_spectrum
    spectrum = get_hessian_max_spectrum(
               ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/src/metrics/hessian_spectrum.py", line 86, in get_hessian_max_spectrum
    top_eigenvalues, _ = hessian_comp.eigenvalues(top_n=hessian_top_k, tol=hessian_tol, maxIter=hessian_max_iter)
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/src/pyhessian/hessian.py", line 142, in eigenvalues
    Hv = hessian_vector_product(self.gradsH, self.params, v)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/src/pyhessian/utils.py", line 88, in hessian_vector_product
    hv = torch.autograd.grad(gradsH,
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/torch/autograd/__init__.py", line 412, in grad
    result = _engine_run_backward(
             ^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/torch/autograd/graph.py", line 744, in _engine_run_backward
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: derivative for aten::grid_sampler_3d_backward is not implemented
wandb:                                                                                
wandb: 🚀 View run fragrant-glitter-119 at: https://wandb.ai/uva-dl2/wang2024/runs/9a5caczr
wandb: ⭐️ View project at: https://wandb.ai/uva-dl2/wang2024
wandb: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
wandb: Find logs at: ./wandb/run-20240608_220938-9a5caczr/logs
Error executing job with overrides: ['spectrum=True', 'ckpt_path=uva-dl2/wang2024/model-xogwka6n:v3', 'batch_size=1']
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/gpfs/home6/scur0399/development/dl2/src/compute_measures_v2.py", line 139, in <module>
    main()
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/hydra/main.py", line 94, in decorated_main
    _run_hydra(
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra
    _run_app(
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/hydra/_internal/utils.py", line 457, in _run_app
    run_and_report(
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/hydra/_internal/utils.py", line 223, in run_and_report
    raise ex
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/hydra/_internal/utils.py", line 220, in run_and_report
    return func()
           ^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/hydra/_internal/utils.py", line 458, in <lambda>
    lambda: hydra.run(
            ^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/hydra/_internal/hydra.py", line 132, in run
    _ = ret.return_value
        ^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/hydra/core/utils.py", line 260, in return_value
    raise self._return_value
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/hydra/core/utils.py", line 186, in run_job
    ret.return_value = task_function(task_cfg)
                       ^^^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/src/compute_measures_v2.py", line 103, in main
    get_spectrum(cfg, config, datamodule, model)
  File "/gpfs/home6/scur0399/development/dl2/src/metrics/hessian_spectrum.py", line 19, in get_spectrum
    spectrum = get_hessian_max_spectrum(
               ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/src/metrics/hessian_spectrum.py", line 86, in get_hessian_max_spectrum
    top_eigenvalues, _ = hessian_comp.eigenvalues(top_n=hessian_top_k, tol=hessian_tol, maxIter=hessian_max_iter)
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/src/pyhessian/hessian.py", line 142, in eigenvalues
    Hv = hessian_vector_product(self.gradsH, self.params, v)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/src/pyhessian/utils.py", line 88, in hessian_vector_product
    hv = torch.autograd.grad(gradsH,
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/torch/autograd/__init__.py", line 412, in grad
    result = _engine_run_backward(
             ^^^^^^^^^^^^^^^^^^^^^
  File "/gpfs/home6/scur0399/development/dl2/.venv/lib/python3.11/site-packages/torch/autograd/graph.py", line 744, in _engine_run_backward
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: derivative for aten::grid_sampler_3d_backward is not implemented
srun: error: gcn22: task 0: Exited with exit code 1
srun: Terminating StepId=6564586.0
dgcnz commented 2 weeks ago

Minimal code for reproduction:

    x = torch.tensor(1.0, requires_grad=True)
    input = x * torch.randn(1, 3, 2, 2, 2, requires_grad=True)
    grid = torch.randn(1, 3, 2, 2, 3, requires_grad=True)
    g = torch.grid_sampler_3d(input, grid, 0, 0, True)
    l = g.sum() 
    first_derivative = torch.autograd.grad(l, x, create_graph=True)[0]
    second_derivative = torch.autograd.grad(first_derivative, x)[0]