RuntimeError: sym_strides() called on an undefined Tensor

EXing commented 1 year ago

❓ Questions and Help

Hi, do you have any idea about this issue?

Traceback (most recent call last):
  File "/home/huangkun/.virtualenvs/theseus/lib/python3.8/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra
    _run_app(
  File "/home/huangkun/.virtualenvs/theseus/lib/python3.8/site-packages/hydra/_internal/utils.py", line 457, in _run_app
    run_and_report(
  File "/home/huangkun/.virtualenvs/theseus/lib/python3.8/site-packages/hydra/_internal/utils.py", line 223, in run_and_report
    raise ex
  File "/home/huangkun/.virtualenvs/theseus/lib/python3.8/site-packages/hydra/_internal/utils.py", line 220, in run_and_report
    return func()
  File "/home/huangkun/.virtualenvs/theseus/lib/python3.8/site-packages/hydra/_internal/utils.py", line 458, in <lambda>
    lambda: hydra.run(
  File "/home/huangkun/.virtualenvs/theseus/lib/python3.8/site-packages/hydra/_internal/hydra.py", line 132, in run
    _ = ret.return_value
  File "/home/huangkun/.virtualenvs/theseus/lib/python3.8/site-packages/hydra/core/utils.py", line 260, in return_value
    raise self._return_value
  File "/home/huangkun/.virtualenvs/theseus/lib/python3.8/site-packages/hydra/core/utils.py", line 186, in run_job
    ret.return_value = task_function(task_cfg)
  File "/home/huangkun/Git/lora_slam/test/slam_test.py", line 32, in main
    bundle_adjustment(global_map, True, cfg, timestamp, 2)
  File "/home/huangkun/Git/lora_slam/slam/bundle_adjustment.py", line 75, in bundle_adjustment
    theseus_outputs, info = theseus_layer.forward(optimizer_kwargs={
  File "/home/huangkun/Git/theseus/theseus/theseus_layer.py", line 93, in forward
    vars, info = _forward(
  File "/home/huangkun/Git/theseus/theseus/theseus_layer.py", line 169, in _forward
    objective.update(input_tensors)
  File "/home/huangkun/Git/theseus/theseus/core/objective.py", line 808, in update
    self.update_vectorization_if_needed()
  File "/home/huangkun/Git/theseus/theseus/core/objective.py", line 826, in update_vectorization_if_needed
    self._vectorization_run()
  File "/home/huangkun/Git/theseus/theseus/core/vectorizer.py", line 401, in _vectorize
    self._handle_singleton_wrapper(schema, cost_fn_wrappers, mode)
  File "/home/huangkun/Git/theseus/theseus/core/vectorizer.py", line 353, in _handle_singleton_wrapper
    ) = Vectorize._get_fn_results_for_mode(mode, wrapper.cost_fn)
  File "/home/huangkun/Git/theseus/theseus/core/vectorizer.py", line 307, in _get_fn_results_for_mode
    return cost_fn.weighted_jacobians_error()
  File "/home/huangkun/Git/theseus/theseus/core/cost_function.py", line 121, in weighted_jacobians_error
    jacobian, err = self.jacobians()
  File "/home/huangkun/Git/theseus/theseus/core/cost_function.py", line 355, in jacobians
    jacobians_full = self._compute_autograd_jacobian_vmap(
  File "/home/huangkun/Git/theseus/theseus/core/cost_function.py", line 341, in _compute_autograd_jacobian_vmap
    return vmap(jacrev(jac_fn, argnums=0))(optim_tensors, aux_tensors)
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/_functorch/vmap.py", line 434, in wrapped
    return _flat_vmap(
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/_functorch/vmap.py", line 39, in fn
    return f(*args, **kwargs)
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/_functorch/vmap.py", line 619, in _flat_vmap
    batched_outputs = func(*batched_inputs, **kwargs)
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/_functorch/eager_transforms.py", line 598, in wrapper_fn
    flat_jacobians_per_input = compute_jacobian_stacked()
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/_functorch/eager_transforms.py", line 529, in compute_jacobian_stacked
    chunked_result = vmap(vjp_fn)(basis)
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/_functorch/vmap.py", line 434, in wrapped
    return _flat_vmap(
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/_functorch/vmap.py", line 39, in fn
    return f(*args, **kwargs)
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/_functorch/vmap.py", line 619, in _flat_vmap
    batched_outputs = func(*batched_inputs, **kwargs)
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/_functorch/eager_transforms.py", line 325, in wrapper
    result = _autograd_grad(flat_primals_out, flat_diff_primals, flat_cotangents,
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/_functorch/eager_transforms.py", line 113, in _autograd_grad
    grad_inputs = torch.autograd.grad(diff_outputs, inputs, grad_outputs,
  File "/home/huangkun/.local/lib/python3.8/site-packages/torch/autograd/__init__.py", line 303, in grad
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
RuntimeError: sym_strides() called on an undefined Tensor

The code has two kinds of loss to do pose-graph bundle adjustment:

class DepthConstraint(CostFunction):
    def __init__(
            self,
            vector: Vector,
            cost_weight: CostWeight,
            name: Optional[str] = None,
    ):
        super().__init__(cost_weight, name=name)
        self.vector = vector
        self.register_optim_var("vector")

    def dim(self):
        return 1

    def _compute_error(self) -> Tuple[torch.Tensor, torch.Tensor]:
        vector = self.vector.tensor
        below_idx = vector < 0.01
        error = vector.new_zeros(vector.shape)
        error = torch.where(below_idx, 0.01 - vector, error)
        return torch.sum(error, dim=1).unsqueeze(1), below_idx

    def error(self) -> torch.Tensor:
        return self._compute_error()[0]

    def jacobians(self) -> Tuple[List[torch.Tensor], torch.Tensor]:
        error, below_idx = self._compute_error()
        J = self.vector.tensor.new_zeros(
            self.vector.shape[0], 1, self.vector.dof()
        )
        J[below_idx.unsqueeze(1)] = -1.0
        return [J], error

    def _copy_impl(self, new_name: Optional[str] = None) -> "DepthConstraint":
        return DepthConstraint(
            self.vector.copy(),
            self.weight.copy(),
            name=new_name,
        )

def photometric_error(optim_vars, aux_vars) -> torch.Tensor:
    d1: th.Vector = optim_vars[0]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
    d2: th.Vector = optim_vars[1]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
    T_wc1: th.SE3 = optim_vars[2]
    T_wc2: th.SE3 = optim_vars[3]

    image1: th.Variable = aux_vars[0]  # (batch_size, 1, h, w) {normalized_intensity}
    image2: th.Variable = aux_vars[1]  # (batch_size, 1, h, w) {normalized_intensity}
    p_unit_sphere_or_plane: th.Variable = aux_vars[2]  # (batch_size=1, h, w, 3)
    k: th.Variable = aux_vars[3]  # (batch_size=1, 4) {fx, fy, cx, cy}

    return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)

def photometric_error_fix_pose(optim_vars, aux_vars) -> torch.Tensor:
    d1: th.Vector = optim_vars[0]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
    d2: th.Vector = optim_vars[1]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)

    image1: th.Variable = aux_vars[0]  # (batch_size, 1, h, w) {normalized_intensity}
    image2: th.Variable = aux_vars[1]  # (batch_size, 1, h, w) {normalized_intensity}
    p_unit_sphere_or_plane: th.Variable = aux_vars[2]  # (batch_size=1, h, w, 3)
    k: th.Variable = aux_vars[3]  # (batch_size=1, 4) {fx, fy, cx, cy}
    T_wc1: th.SE3 = aux_vars[4]
    T_wc2: th.SE3 = aux_vars[5]

    return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)

def photometric_error_fix_ref(optim_vars, aux_vars) -> torch.Tensor:
    d2: th.Vector = optim_vars[0]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
    T_wc2: th.SE3 = optim_vars[1]

    image1: th.Variable = aux_vars[0]  # (batch_size, 1, h, w) {normalized_intensity}
    image2: th.Variable = aux_vars[1]  # (batch_size, 1, h, w) {normalized_intensity}
    p_unit_sphere_or_plane: th.Variable = aux_vars[2]  # (batch_size=1, h, w, 3)
    k: th.Variable = aux_vars[3]  # (batch_size=1, 4) {fx, fy, cx, cy}
    d1: th.Vector = aux_vars[4]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
    T_wc1: th.SE3 = aux_vars[5]

    return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)

def photometric_error_fix_pose_and_ref(optim_vars, aux_vars) -> torch.Tensor:
    d2: th.Vector = optim_vars[0]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)

    image1: th.Variable = aux_vars[0]  # (batch_size, 1, h, w) {normalized_intensity}
    image2: th.Variable = aux_vars[1]  # (batch_size, 1, h, w) {normalized_intensity}
    p_unit_sphere_or_plane: th.Variable = aux_vars[2]  # (batch_size=1, h, w, 3)
    k: th.Variable = aux_vars[3]  # (batch_size=1, 4) {fx, fy, cx, cy}
    d1: th.Vector = aux_vars[4]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
    T_wc1: th.SE3 = aux_vars[5]
    T_wc2: th.SE3 = aux_vars[6]

    return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)

    def add_edge_cost_function(self, kf1: Keyframe, kf2: Keyframe, fix_ref: bool):
        if self.fix_pose:
            if fix_ref:
                optim_vars = kf2.depth
                aux_vars = kf1.image, kf2.image, self.p_unit_sphere_or_plane, self.k, kf1.depth, kf1.T_wc, kf2.T_wc
                cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_pose_and_ref, 1,
                                                        aux_vars=aux_vars)
            else:
                optim_vars = kf1.depth, kf2.depth
                aux_vars = kf1.image, kf2.image, self.p_unit_sphere_or_plane, self.k, kf1.T_wc, kf2.T_wc
                cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_pose, 1,
                                                        aux_vars=aux_vars)
        else:
            if fix_ref:
                optim_vars = kf2.depth, kf2.T_wc
                aux_vars = kf1.image, kf2.image, self.p_unit_sphere_or_plane, self.k, kf1.depth, kf1.T_wc
                cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_ref, 1,
                                                        aux_vars=aux_vars)
            else:
                optim_vars = kf1.depth, kf2.depth, kf1.T_wc, kf2.T_wc
                aux_vars = kf1.image, kf2.image, self.p_unit_sphere_or_plane, self.k
                cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error, 1,
                                                        aux_vars=aux_vars)
        self.objective.add(cost_function)

    def add_node_cost_function(self, kf: Keyframe):
        kf.data_updated = True

        # constraint: depth > 0
        cost_function = DepthConstraint(kf.depth, th.ScaleCostWeight(
            torch.tensor(1000., dtype=kf.depth.dtype, device=kf.depth.device)))
        self.objective.add(cost_function)

    optimizer = th.LevenbergMarquardt(
        problem.objective,
        linear_solver_cls=th.BaspachoSparseSolver,
        linearization_cls=th.SparseLinearization,
    )
    theseus_layer = th.TheseusLayer(optimizer)
    with global_map.keyframes_mutex:
        theseus_outputs, info = theseus_layer.forward(optimizer_kwargs={
            "verbose": cfg.inner_optim.verbose,
            "adaptive_damping": True})

luisenp commented 1 year ago

Do you get an error if you use autograd_mode="dense" for the AutoDiffCostFunction?

This is a blind guess, but are you accessing the underlying C pointer for any tensors? I couldn't see what is happening inside _photometric_error().

EXing commented 1 year ago

error for autograd_mode="dense": RuntimeError: There was an error while running the linear optimizer. Original error message: [enforce fail at alloc_cpu.cpp:75] err == 0. DefaultCPUAllocator: can't allocate memory: you tried to allocate 3019898880000 bytes. Error code 12 (Cannot allocate memory). Backward pass will not work. To obtain the best solution seen before the error, run with torch.no_grad()

def _photometric_error(T_wc1: th.SE3, T_wc2: th.SE3,
                       d1: th.Vector, d2: th.Vector,
                       image1: th.Variable, image2: th.Variable,
                       p_unit_sphere_or_plane: th.Variable,
                       k: th.Variable) -> torch.Tensor:
    """note: undistort and crop image ahead, since mask not supported."""
    batch_size = image1.shape[0]
    h = image1.shape[2]
    w = image1.shape[3]

    # bidirectional projection: 2 -> 1
    T_c1c2 = T_wc1.inverse().compose(T_wc2)
    p_c1 = lieF.SE3.transform(T_c1c2.tensor, p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1))
    p1 = p_c1[:, :, :, :2] / p_c1[:, :, :, 2, None]  # (batch_size, h, w, 2)
    p1 = p1 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
    image2_in_1 = torch.nn.functional.grid_sample(image1.tensor, p1, padding_mode="border")
    err12 = image2_in_1 - image2.tensor

    # bidirectional projection: 1 -> 2
    T_c2c1 = T_c1c2.inverse()
    p_c2 = lieF.SE3.transform(T_c2c1.tensor, p_unit_sphere_or_plane.tensor * d1.tensor.view(batch_size, h, w, 1))
    p2 = p_c2[:, :, :, :2] / p_c2[:, :, :, 2, None]  # (batch_size, h, w, 2)
    p2 = p2 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
    image1_in_2 = torch.nn.functional.grid_sample(image2.tensor, p2, padding_mode="border")
    err21 = image1_in_2 - image1.tensor

    # the original error dim is too big, make it impossible for Jacobian (batch_size, err_dim, var_dof)
    err = torch.cat((err21, err12), dim=1)
    err = torch.nn.functional.huber_loss(err, torch.zeros(1, dtype=err.dtype, device=err.device),
                                         reduction="none", delta=0.5)
    return torch.sum(err, dim=(1, 2, 3)).unsqueeze(1)  # (batch_size, 1)

EXing commented 1 year ago

Screenshot from 2023-09-09 15-56-46 Screenshot from 2023-09-09 16-15-32

luisenp commented 1 year ago

That's A LOT of memory that was trying to be allocated. What's the size of your optimization problem? In particular:

Number and dimension size of variables
Number of cost functions and their error dimension

Also, I didn't see anything too strange inside _photometric_error. In general, the easiest way for us to help would be if you can isolate the error and provide a short repro script that doesn't rely on loading any external data.

EXing commented 1 year ago

@luisenp repro script here:

import torch
import theseus as th
import torchlie.functional as lieF

def _photometric_error(T_wc1: th.SE3, T_wc2: th.SE3,
                       d1: th.Vector, d2: th.Vector,
                       image1: th.Variable, image2: th.Variable,
                       p_unit_sphere_or_plane: th.Variable,
                       k: th.Variable) -> torch.Tensor:
    """note: undistort and crop image ahead, since mask not supported."""
    batch_size = image1.shape[0]
    h = image1.shape[2]
    w = image1.shape[3]

    # bidirectional projection: 2 -> 1
    T_c1c2 = T_wc1.inverse().compose(T_wc2)
    p_c1 = lieF.SE3.transform(T_c1c2.tensor, p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1))
    p1 = p_c1[:, :, :, :2] / p_c1[:, :, :, 2, None]  # (batch_size, h, w, 2)
    p1 = p1 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
    image2_in_1 = torch.nn.functional.grid_sample(image1.tensor, p1, padding_mode="border")
    err12 = image2_in_1 - image2.tensor

    # bidirectional projection: 1 -> 2
    T_c2c1 = T_c1c2.inverse()
    p_c2 = lieF.SE3.transform(T_c2c1.tensor, p_unit_sphere_or_plane.tensor * d1.tensor.view(batch_size, h, w, 1))
    p2 = p_c2[:, :, :, :2] / p_c2[:, :, :, 2, None]  # (batch_size, h, w, 2)
    p2 = p2 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
    image1_in_2 = torch.nn.functional.grid_sample(image2.tensor, p2, padding_mode="border")
    err21 = image1_in_2 - image1.tensor

    # the original error dim is too big, make it impossible for Jacobian (batch_size, err_dim, var_dof)
    err = torch.cat((err21, err12), dim=1)
    err = torch.nn.functional.huber_loss(err, torch.zeros(1, dtype=err.dtype, device=err.device),
                                         reduction="none", delta=0.5)
    return torch.sum(err, dim=(1, 2, 3)).unsqueeze(1)  # (batch_size, 1)

def photometric_error_fix_pose(optim_vars, aux_vars) -> torch.Tensor:
    d1: th.Vector = optim_vars[0]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
    d2: th.Vector = optim_vars[1]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)

    image1: th.Variable = aux_vars[0]  # (batch_size, 1, h, w) {normalized_intensity}
    image2: th.Variable = aux_vars[1]  # (batch_size, 1, h, w) {normalized_intensity}
    p_unit_sphere_or_plane: th.Variable = aux_vars[2]  # (batch_size=1, h, w, 3)
    k: th.Variable = aux_vars[3]  # (batch_size=1, 4) {fx, fy, cx, cy}
    T_wc1: th.SE3 = aux_vars[4]
    T_wc2: th.SE3 = aux_vars[5]

    return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)

def main():
    device = "cuda:0"
    dtype = torch.float32
    h = 480
    w = 640
    image1 = th.Variable(torch.rand(1, 1, 480, 640, dtype=dtype, device=device))
    image2 = th.Variable(torch.rand(1, 1, 480, 640, dtype=dtype, device=device))
    depth1 = th.Vector.rand(1, 480 * 640, dtype=dtype, device=device, requires_grad=True)
    depth2 = th.Vector.rand(1, 480 * 640, dtype=dtype, device=device, requires_grad=True)
    T_wc1 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
    T_wc2 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)

    k = th.Variable(torch.tensor([481.20, -480.00, 319.50, 239.50], dtype=dtype, device=device).view(1, 4))
    with torch.no_grad():
        x_coords = torch.linspace(0, w - 1, w, device=k.device)
        y_coords = torch.linspace(0, h - 1, h, device=k.device)
        fx, fy, cx, cy = k[0, 0], k[0, 1], k[0, 2], k[0, 3]
        x_normalized = (x_coords - cx) / fx  # (w)
        y_normalized = (y_coords - cy) / fy  # (h)
        p_unit_sphere_or_plane = torch.empty((1, h, w, 3), dtype=k.dtype, device=k.device)
        p_unit_sphere_or_plane[..., 0] = x_normalized.view(1, 1, w)
        p_unit_sphere_or_plane[..., 1] = y_normalized.view(1, h, 1)
        p_unit_sphere_or_plane[..., 2] = 1
    p_unit_sphere_or_plane = th.Variable(p_unit_sphere_or_plane)  # (batch_size=1, h, w, 3)

    optim_vars = depth1, depth2
    aux_vars = image1, image2, p_unit_sphere_or_plane, k, T_wc1, T_wc2
    weight = th.ScaleCostWeight(torch.tensor(1., dtype=dtype, device=device))
    cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_pose, 1, weight,
                                            aux_vars=aux_vars)
    objective = th.Objective(dtype=k.dtype)
    objective.add(cost_function)
    optimizer = th.LevenbergMarquardt(
        objective,
        linear_solver_cls=th.BaspachoSparseSolver,
        linearization_cls=th.SparseLinearization,
    )
    theseus_layer = th.TheseusLayer(optimizer)
    theseus_outputs, info = theseus_layer.forward(optimizer_kwargs={
        "verbose": True,
        "adaptive_damping": True})

if __name__ == "__main__":
    main()

EXing commented 1 year ago

By setting smaller w and h, try autograd_mode="dense", I find it's a bug for theseus:

import torch
import theseus as th
import torchlie.functional as lieF

def _photometric_error(T_wc1: th.SE3, T_wc2: th.SE3,
                       d1: th.Vector, d2: th.Vector,
                       image1: th.Variable, image2: th.Variable,
                       p_unit_sphere_or_plane: th.Variable,
                       k: th.Variable) -> torch.Tensor:
    """note: undistort and crop image ahead, since mask not supported."""
    batch_size = image1.shape[0]
    h = image1.shape[2]
    w = image1.shape[3]

    # bidirectional projection: 2 -> 1
    T_c1c2 = T_wc1.inverse().compose(T_wc2)
    p_c1 = lieF.SE3.transform(T_c1c2.tensor, p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1))
    p1 = p_c1[:, :, :, :2] / p_c1[:, :, :, 2, None]  # (batch_size, h, w, 2)
    p1 = p1 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
    image2_in_1 = torch.nn.functional.grid_sample(image1.tensor, p1, padding_mode="border")
    err12 = image2_in_1 - image2.tensor

    # bidirectional projection: 1 -> 2
    T_c2c1 = T_c1c2.inverse()
    p_c2 = lieF.SE3.transform(T_c2c1.tensor, p_unit_sphere_or_plane.tensor * d1.tensor.view(batch_size, h, w, 1))
    p2 = p_c2[:, :, :, :2] / p_c2[:, :, :, 2, None]  # (batch_size, h, w, 2)
    p2 = p2 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
    image1_in_2 = torch.nn.functional.grid_sample(image2.tensor, p2, padding_mode="border")
    err21 = image1_in_2 - image1.tensor

    # the original error dim is too big, make it impossible for Jacobian (batch_size, err_dim, var_dof)
    err = torch.cat((err21, err12), dim=1)
    err = torch.nn.functional.huber_loss(err, torch.zeros(1, dtype=err.dtype, device=err.device),
                                         reduction="none", delta=0.5)
    return torch.sum(err, dim=(1, 2, 3)).unsqueeze(1)  # (batch_size, 1)

def photometric_error_fix_pose(optim_vars, aux_vars) -> torch.Tensor:
    d1: th.Vector = optim_vars[0]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
    d2: th.Vector = optim_vars[1]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)

    image1: th.Variable = aux_vars[0]  # (batch_size, 1, h, w) {normalized_intensity}
    image2: th.Variable = aux_vars[1]  # (batch_size, 1, h, w) {normalized_intensity}
    p_unit_sphere_or_plane: th.Variable = aux_vars[2]  # (batch_size=1, h, w, 3)
    k: th.Variable = aux_vars[3]  # (batch_size=1, 4) {fx, fy, cx, cy}
    T_wc1: th.SE3 = aux_vars[4]
    T_wc2: th.SE3 = aux_vars[5]

    return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)

def main():
    device = "cuda:0"
    dtype = torch.float32
    h = 10
    w = 10
    image1 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
    image2 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
    depth1 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
    depth2 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
    T_wc1 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
    T_wc2 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)

    k = th.Variable(torch.tensor([481.20, -480.00, 319.50, 239.50], dtype=dtype, device=device).view(1, 4))
    with torch.no_grad():
        x_coords = torch.linspace(0, w - 1, w, device=k.device)
        y_coords = torch.linspace(0, h - 1, h, device=k.device)
        fx, fy, cx, cy = k[0, 0], k[0, 1], k[0, 2], k[0, 3]
        x_normalized = (x_coords - cx) / fx  # (w)
        y_normalized = (y_coords - cy) / fy  # (h)
        p_unit_sphere_or_plane = torch.empty((1, h, w, 3), dtype=k.dtype, device=k.device)
        p_unit_sphere_or_plane[..., 0] = x_normalized.view(1, 1, w)
        p_unit_sphere_or_plane[..., 1] = y_normalized.view(1, h, 1)
        p_unit_sphere_or_plane[..., 2] = 1
    p_unit_sphere_or_plane = th.Variable(p_unit_sphere_or_plane)  # (batch_size=1, h, w, 3)

    optim_vars = depth1, depth2
    aux_vars = image1, image2, p_unit_sphere_or_plane, k, T_wc1, T_wc2
    weight = th.ScaleCostWeight(torch.tensor(1., dtype=dtype, device=device))
    cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_pose, 1, weight,
                                            aux_vars=aux_vars, autograd_mode="dense")
    objective = th.Objective(dtype=k.dtype)
    objective.device = device
    objective.add(cost_function)
    optimizer = th.LevenbergMarquardt(
        objective,
        linear_solver_cls=th.BaspachoSparseSolver,
        linearization_cls=th.SparseLinearization,
    )
    theseus_layer = th.TheseusLayer(optimizer)
    theseus_outputs, info = theseus_layer.forward(optimizer_kwargs={
        "verbose": True,
        "adaptive_damping": True})

if __name__ == "__main__":
    main()

Traceback (most recent call last):
  File "/home/huangkun/Git/lora_slam/test/reproduce.py", line 98, in <module>
    main()
  File "/home/huangkun/Git/lora_slam/test/reproduce.py", line 86, in main
    optimizer = th.LevenbergMarquardt(
  File "/home/huangkun/Git/theseus/theseus/optimizer/nonlinear/levenberg_marquardt.py", line 69, in __init__
    super().__init__(
  File "/home/huangkun/Git/theseus/theseus/optimizer/nonlinear/nonlinear_least_squares.py", line 90, in __init__
    self.linear_solver = linear_solver_cls(
  File "/home/huangkun/Git/theseus/theseus/optimizer/linear/baspacho_sparse_solver.py", line 45, in __init__
    self.reset()
  File "/home/huangkun/Git/theseus/theseus/optimizer/linear/baspacho_sparse_solver.py", line 111, in reset
    self.symbolic_decomposition = SymbolicDecomposition(
RuntimeError: Expected device == "cpu" || device == "cuda" to be true, but got false.  (Could this error message be improved?  If so, please report an enhancement request to PyTorch.)

baspacho_sparse_solver only accept device name as "cpu" or "cuda", but mine is "cuda:0"

EXing commented 1 year ago

After disabling the device check by comment line 775 in objective.py, 'autograd_mode="dense"' works. But still the same error for vmap. 'RuntimeError: sym_strides() called on an undefined Tensor'

import torch
import theseus as th
import torchlie.functional as lieF

def _photometric_error(T_wc1: th.SE3, T_wc2: th.SE3,
                       d1: th.Vector, d2: th.Vector,
                       image1: th.Variable, image2: th.Variable,
                       p_unit_sphere_or_plane: th.Variable,
                       k: th.Variable) -> torch.Tensor:
    """note: undistort and crop image ahead, since mask not supported."""
    batch_size = image1.shape[0]
    h = image1.shape[2]
    w = image1.shape[3]

    # bidirectional projection: 2 -> 1
    T_c1c2 = T_wc1.inverse().compose(T_wc2)
    p_c1 = lieF.SE3.transform(T_c1c2.tensor, p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1))
    p1 = p_c1[:, :, :, :2] / p_c1[:, :, :, 2, None]  # (batch_size, h, w, 2)
    p1 = p1 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
    image2_in_1 = torch.nn.functional.grid_sample(image1.tensor, p1, padding_mode="border")
    err12 = image2_in_1 - image2.tensor

    # bidirectional projection: 1 -> 2
    T_c2c1 = T_c1c2.inverse()
    p_c2 = lieF.SE3.transform(T_c2c1.tensor, p_unit_sphere_or_plane.tensor * d1.tensor.view(batch_size, h, w, 1))
    p2 = p_c2[:, :, :, :2] / p_c2[:, :, :, 2, None]  # (batch_size, h, w, 2)
    p2 = p2 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
    image1_in_2 = torch.nn.functional.grid_sample(image2.tensor, p2, padding_mode="border")
    err21 = image1_in_2 - image1.tensor

    # the original error dim is too big, make it impossible for Jacobian (batch_size, err_dim, var_dof)
    err = torch.cat((err21, err12), dim=1)
    err = torch.nn.functional.huber_loss(err, torch.zeros(1, dtype=err.dtype, device=err.device),
                                         reduction="none", delta=0.5)
    return torch.sum(err, dim=(1, 2, 3)).unsqueeze(1)  # (batch_size, 1)

def photometric_error_fix_pose(optim_vars, aux_vars) -> torch.Tensor:
    d1: th.Vector = optim_vars[0]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
    d2: th.Vector = optim_vars[1]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)

    image1: th.Variable = aux_vars[0]  # (batch_size, 1, h, w) {normalized_intensity}
    image2: th.Variable = aux_vars[1]  # (batch_size, 1, h, w) {normalized_intensity}
    p_unit_sphere_or_plane: th.Variable = aux_vars[2]  # (batch_size=1, h, w, 3)
    k: th.Variable = aux_vars[3]  # (batch_size=1, 4) {fx, fy, cx, cy}
    T_wc1: th.SE3 = aux_vars[4]
    T_wc2: th.SE3 = aux_vars[5]

    return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)

def main():
    device = torch.device("cuda")
    dtype = torch.float32
    h = 10
    w = 10
    image1 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
    image2 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
    depth1 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
    depth2 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
    T_wc1 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
    T_wc2 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)

    k = th.Variable(torch.tensor([481.20, -480.00, 319.50, 239.50], dtype=dtype, device=device).view(1, 4))
    with torch.no_grad():
        x_coords = torch.linspace(0, w - 1, w, device=k.device)
        y_coords = torch.linspace(0, h - 1, h, device=k.device)
        fx, fy, cx, cy = k[0, 0], k[0, 1], k[0, 2], k[0, 3]
        x_normalized = (x_coords - cx) / fx  # (w)
        y_normalized = (y_coords - cy) / fy  # (h)
        p_unit_sphere_or_plane = torch.empty((1, h, w, 3), dtype=k.dtype, device=k.device)
        p_unit_sphere_or_plane[..., 0] = x_normalized.view(1, 1, w)
        p_unit_sphere_or_plane[..., 1] = y_normalized.view(1, h, 1)
        p_unit_sphere_or_plane[..., 2] = 1
    p_unit_sphere_or_plane = th.Variable(p_unit_sphere_or_plane)  # (batch_size=1, h, w, 3)

    optim_vars = depth1, depth2
    aux_vars = image1, image2, p_unit_sphere_or_plane, k, T_wc1, T_wc2
    weight = th.ScaleCostWeight(torch.tensor(1., dtype=dtype, device=device))
    cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_pose, 1, weight,
                                            aux_vars=aux_vars)
    objective = th.Objective(dtype=k.dtype)
    objective.to(device=device)
    objective.add(cost_function)
    optimizer = th.LevenbergMarquardt(
        objective,
        linear_solver_cls=th.BaspachoSparseSolver,
        linearization_cls=th.SparseLinearization,
    )
    theseus_layer = th.TheseusLayer(optimizer)
    theseus_outputs, info = theseus_layer.forward(optimizer_kwargs={
        "verbose": True,
        "adaptive_damping": True})

if __name__ == "__main__":
    main()

luisenp commented 1 year ago

Thanks!

I played a bit with this and it seems that torch.vmap doesn't like this operation inside _photometric_error --> p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1) (particularly, the view function). You'll need to rewrite this line in some way without using view if you want to use vmap. Note that this autograd mode is only useful if your batch size is > 1.

I also ran with autograd mode dense, with a smaller image size (48 64), and the code also works. However for your full image size I'm not sure you'll be able to run our optimizers. You have two optimization variables of size 480 640 = 307200, so you'll end having to solve a dense linear system with matrix size of 614000 * 614000, which is something like 1.5TB of memory.

Maybe you need to revisit how you are formulating this problem?

luisenp commented 1 year ago

Oh sorry, I missed your most recent messages. I'll open an issue for the Baspacho bug, thanks for catching this!

EXing commented 1 year ago

Thanks, Do you have any suggestions about the choice of linear_solver and linearization for this problem?

luisenp commented 1 year ago

For problems with many cost functions and sparse connectivity, Baspacho is usually the best choice. On the other hand, if you have a single cost function, then there is no advantage from using a sparse solver, because the linear systems will be dense; in that case CholeskyDenseSolver is probably better. When there are only a few cost functions, it's hard to say, might require some experimentation.

That being said, for your current formulation the main issue is the extremely large number of optimization variables. Are you able to reformulate this problem in some way so that the dimension of the optimization variables is lower?

EXing commented 1 year ago

Thanks, I will try sparse depth encoding. BTW, the opt variable needs to be a subclass of Manifold as said by the doc. But I find there is no Matrix-type for Manifold.

luisenp commented 1 year ago

@EXing We don't have a Matrix class, unfortunately, as we prioritized other features. We do welcome community contributions and pull requests, so, if you are interested we can give you some guidance on how to get started.

EXing commented 1 year ago

I will try but don't have enough time to do this. BTW, it seems vmap support view() function

import torch

# Define a sample tensor
batch_size = 5
h = 4
w = 4
tensor = torch.rand((batch_size, h * w))

# Define a function to reshape each tensor in the batch
def reshape_tensor(tensor):
    return tensor.view(h, w)

# Use vmap to apply the reshape function to each tensor in the batch
result = torch.func.vmap(reshape_tensor)(tensor)

print(result.shape)  # Output: (5, 4, 4)

EXing commented 1 year ago

By downsampling depth (60, 80) outside and upsampling (480, 640) inside cost function, the optimization is able to run. But it's very slow and memory-consuming, any suggestions?

EXing commented 1 year ago

I found out the dense solver is faster. But CholeskyDenseSolver with error, only LUDenseSolver works. RuntimeError: There was an error while running the linear optimizer. Original error message: linalg.cholesky: (Batch element 0): The factorization could not be completed because the input is not positive-definite (the leading minor of order 641 is not positive-definite).

luisenp commented 1 year ago

The problem my not be necessarily the use of view but some combination of view with the way the tensor is created, or with other operations. What I know is that when I replaced the operation p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1) with p_unit_sphere_or_plane.tensor I was able to run with vmap.

That's strange about LUDenseSolver working but CholeskyDenseSolver not. Do you have a repro?

EXing commented 1 year ago

LUDenseSolver only works with LevenbergMarquardt

luisenp commented 1 year ago

If you are using GaussNewton, then that's not too surprising, because we don't use damping when solving the linear system, and you can get the kind of error you reported above in some cases.

EXing commented 1 year ago

The problem my not be necessarily the use of view but some combination of view with the way the tensor is created, or with other operations. What I know is that when I replaced the operation p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1) with p_unit_sphere_or_plane.tensor I was able to run with vmap.

That's strange about LUDenseSolver working but CholeskyDenseSolver not. Do you have a repro?

still the same code

import torch
import theseus as th
import torchlie.functional as lieF

def _photometric_error(T_wc1: th.SE3, T_wc2: th.SE3,
                       d1: th.Vector, d2: th.Vector,
                       image1: th.Variable, image2: th.Variable,
                       p_unit_sphere_or_plane: th.Variable,
                       k: th.Variable) -> torch.Tensor:
    """note: undistort and crop image ahead, since mask not supported."""
    batch_size = image1.shape[0]
    h = image1.shape[2]
    w = image1.shape[3]

    # bidirectional projection: 2 -> 1
    T_c1c2 = T_wc1.inverse().compose(T_wc2)
    p_c1 = lieF.SE3.transform(T_c1c2.tensor, p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1))
    p1 = p_c1[:, :, :, :2] / p_c1[:, :, :, 2, None]  # (batch_size, h, w, 2)
    p1 = p1 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
    image2_in_1 = torch.nn.functional.grid_sample(image1.tensor, p1, padding_mode="border")
    err12 = image2_in_1 - image2.tensor

    # bidirectional projection: 1 -> 2
    T_c2c1 = T_c1c2.inverse()
    p_c2 = lieF.SE3.transform(T_c2c1.tensor, p_unit_sphere_or_plane.tensor * d1.tensor.view(batch_size, h, w, 1))
    p2 = p_c2[:, :, :, :2] / p_c2[:, :, :, 2, None]  # (batch_size, h, w, 2)
    p2 = p2 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
    image1_in_2 = torch.nn.functional.grid_sample(image2.tensor, p2, padding_mode="border")
    err21 = image1_in_2 - image1.tensor

    # the original error dim is too big, make it impossible for Jacobian (batch_size, err_dim, var_dof)
    err = torch.cat((err21, err12), dim=1)
    err = torch.nn.functional.huber_loss(err, torch.zeros(1, dtype=err.dtype, device=err.device),
                                         reduction="none", delta=0.5)
    return torch.sum(err, dim=(1, 2, 3)).unsqueeze(1)  # (batch_size, 1)

def photometric_error_fix_pose(optim_vars, aux_vars) -> torch.Tensor:
    d1: th.Vector = optim_vars[0]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
    d2: th.Vector = optim_vars[1]  # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)

    image1: th.Variable = aux_vars[0]  # (batch_size, 1, h, w) {normalized_intensity}
    image2: th.Variable = aux_vars[1]  # (batch_size, 1, h, w) {normalized_intensity}
    p_unit_sphere_or_plane: th.Variable = aux_vars[2]  # (batch_size=1, h, w, 3)
    k: th.Variable = aux_vars[3]  # (batch_size=1, 4) {fx, fy, cx, cy}
    T_wc1: th.SE3 = aux_vars[4]
    T_wc2: th.SE3 = aux_vars[5]

    return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)

def main():
    device = "cuda:0"
    dtype = torch.float32
    h = int(480/8)
    w = int(640/8)
    image1 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
    image2 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
    depth1 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
    depth2 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
    T_wc1 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
    T_wc2 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)

    k = th.Variable(torch.tensor([481.20, -480.00, 319.50, 239.50], dtype=dtype, device=device).view(1, 4))
    with torch.no_grad():
        x_coords = torch.linspace(0, w - 1, w, device=k.device)
        y_coords = torch.linspace(0, h - 1, h, device=k.device)
        fx, fy, cx, cy = k[0, 0], k[0, 1], k[0, 2], k[0, 3]
        x_normalized = (x_coords - cx) / fx  # (w)
        y_normalized = (y_coords - cy) / fy  # (h)
        p_unit_sphere_or_plane = torch.empty((1, h, w, 3), dtype=k.dtype, device=k.device)
        p_unit_sphere_or_plane[..., 0] = x_normalized.view(1, 1, w)
        p_unit_sphere_or_plane[..., 1] = y_normalized.view(1, h, 1)
        p_unit_sphere_or_plane[..., 2] = 1
    p_unit_sphere_or_plane = th.Variable(p_unit_sphere_or_plane)  # (batch_size=1, h, w, 3)

    optim_vars = depth1, depth2
    aux_vars = image1, image2, p_unit_sphere_or_plane, k, T_wc1, T_wc2
    weight = th.ScaleCostWeight(torch.tensor(1., dtype=dtype, device=device))
    cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_pose, 1, weight,
                                            aux_vars=aux_vars, autograd_mode="dense")
    objective = th.Objective(dtype=k.dtype)
    objective.to(device=device)
    objective.add(cost_function)
    optimizer = th.LevenbergMarquardt(
        objective,
        linear_solver_cls=th.CholeskyDenseSolver,
        linearization_cls=th.DenseLinearization,
    )
    theseus_layer = th.TheseusLayer(optimizer)
    theseus_outputs, info = theseus_layer.forward(optimizer_kwargs={
        "verbose": True,
        "adaptive_damping": True})

if __name__ == "__main__":
    main()

luisenp commented 1 year ago

Something seems off about your optimization problem. The Jacobians matrix of size 1 x 9600 is mostly zeros, except for 5 elements, as shown below, resulting in a rank deficient matrix that's not possible to solve.

EXing commented 1 year ago

Something seems off about your optimization problem. The Jacobians matrix of size 1 x 9600 is mostly zeros, except for 5 elements, as shown below, resulting in a rank deficient matrix that's not possible to solve.

That's true. Thanks. By the way, should we manually set requires_grad=True for opt variables?

luisenp commented 1 year ago

That's not necessary, even when using AutoDiffCostFunction.

luisenp commented 1 year ago

It's been a while so I'll close this issue. Feel free to reopen if this exact problem still persists.

facebookresearch / theseus

RuntimeError: sym_strides() called on an undefined Tensor #596

❓ Questions and Help