Closed EXing closed 1 year ago
Do you get an error if you use autograd_mode="dense"
for the AutoDiffCostFunction
?
This is a blind guess, but are you accessing the underlying C pointer for any tensors? I couldn't see what is happening inside _photometric_error()
.
error for autograd_mode="dense":
RuntimeError: There was an error while running the linear optimizer. Original error message: [enforce fail at alloc_cpu.cpp:75] err == 0. DefaultCPUAllocator: can't allocate memory: you tried to allocate 3019898880000 bytes. Error code 12 (Cannot allocate memory). Backward pass will not work. To obtain the best solution seen before the error, run with torch.no_grad()
def _photometric_error(T_wc1: th.SE3, T_wc2: th.SE3,
d1: th.Vector, d2: th.Vector,
image1: th.Variable, image2: th.Variable,
p_unit_sphere_or_plane: th.Variable,
k: th.Variable) -> torch.Tensor:
"""note: undistort and crop image ahead, since mask not supported."""
batch_size = image1.shape[0]
h = image1.shape[2]
w = image1.shape[3]
# bidirectional projection: 2 -> 1
T_c1c2 = T_wc1.inverse().compose(T_wc2)
p_c1 = lieF.SE3.transform(T_c1c2.tensor, p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1))
p1 = p_c1[:, :, :, :2] / p_c1[:, :, :, 2, None] # (batch_size, h, w, 2)
p1 = p1 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
image2_in_1 = torch.nn.functional.grid_sample(image1.tensor, p1, padding_mode="border")
err12 = image2_in_1 - image2.tensor
# bidirectional projection: 1 -> 2
T_c2c1 = T_c1c2.inverse()
p_c2 = lieF.SE3.transform(T_c2c1.tensor, p_unit_sphere_or_plane.tensor * d1.tensor.view(batch_size, h, w, 1))
p2 = p_c2[:, :, :, :2] / p_c2[:, :, :, 2, None] # (batch_size, h, w, 2)
p2 = p2 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
image1_in_2 = torch.nn.functional.grid_sample(image2.tensor, p2, padding_mode="border")
err21 = image1_in_2 - image1.tensor
# the original error dim is too big, make it impossible for Jacobian (batch_size, err_dim, var_dof)
err = torch.cat((err21, err12), dim=1)
err = torch.nn.functional.huber_loss(err, torch.zeros(1, dtype=err.dtype, device=err.device),
reduction="none", delta=0.5)
return torch.sum(err, dim=(1, 2, 3)).unsqueeze(1) # (batch_size, 1)
That's A LOT of memory that was trying to be allocated. What's the size of your optimization problem? In particular:
Also, I didn't see anything too strange inside _photometric_error
. In general, the easiest way for us to help would be if you can isolate the error and provide a short repro script that doesn't rely on loading any external data.
@luisenp repro script here:
import torch
import theseus as th
import torchlie.functional as lieF
def _photometric_error(T_wc1: th.SE3, T_wc2: th.SE3,
d1: th.Vector, d2: th.Vector,
image1: th.Variable, image2: th.Variable,
p_unit_sphere_or_plane: th.Variable,
k: th.Variable) -> torch.Tensor:
"""note: undistort and crop image ahead, since mask not supported."""
batch_size = image1.shape[0]
h = image1.shape[2]
w = image1.shape[3]
# bidirectional projection: 2 -> 1
T_c1c2 = T_wc1.inverse().compose(T_wc2)
p_c1 = lieF.SE3.transform(T_c1c2.tensor, p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1))
p1 = p_c1[:, :, :, :2] / p_c1[:, :, :, 2, None] # (batch_size, h, w, 2)
p1 = p1 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
image2_in_1 = torch.nn.functional.grid_sample(image1.tensor, p1, padding_mode="border")
err12 = image2_in_1 - image2.tensor
# bidirectional projection: 1 -> 2
T_c2c1 = T_c1c2.inverse()
p_c2 = lieF.SE3.transform(T_c2c1.tensor, p_unit_sphere_or_plane.tensor * d1.tensor.view(batch_size, h, w, 1))
p2 = p_c2[:, :, :, :2] / p_c2[:, :, :, 2, None] # (batch_size, h, w, 2)
p2 = p2 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
image1_in_2 = torch.nn.functional.grid_sample(image2.tensor, p2, padding_mode="border")
err21 = image1_in_2 - image1.tensor
# the original error dim is too big, make it impossible for Jacobian (batch_size, err_dim, var_dof)
err = torch.cat((err21, err12), dim=1)
err = torch.nn.functional.huber_loss(err, torch.zeros(1, dtype=err.dtype, device=err.device),
reduction="none", delta=0.5)
return torch.sum(err, dim=(1, 2, 3)).unsqueeze(1) # (batch_size, 1)
def photometric_error_fix_pose(optim_vars, aux_vars) -> torch.Tensor:
d1: th.Vector = optim_vars[0] # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
d2: th.Vector = optim_vars[1] # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
image1: th.Variable = aux_vars[0] # (batch_size, 1, h, w) {normalized_intensity}
image2: th.Variable = aux_vars[1] # (batch_size, 1, h, w) {normalized_intensity}
p_unit_sphere_or_plane: th.Variable = aux_vars[2] # (batch_size=1, h, w, 3)
k: th.Variable = aux_vars[3] # (batch_size=1, 4) {fx, fy, cx, cy}
T_wc1: th.SE3 = aux_vars[4]
T_wc2: th.SE3 = aux_vars[5]
return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)
def main():
device = "cuda:0"
dtype = torch.float32
h = 480
w = 640
image1 = th.Variable(torch.rand(1, 1, 480, 640, dtype=dtype, device=device))
image2 = th.Variable(torch.rand(1, 1, 480, 640, dtype=dtype, device=device))
depth1 = th.Vector.rand(1, 480 * 640, dtype=dtype, device=device, requires_grad=True)
depth2 = th.Vector.rand(1, 480 * 640, dtype=dtype, device=device, requires_grad=True)
T_wc1 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
T_wc2 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
k = th.Variable(torch.tensor([481.20, -480.00, 319.50, 239.50], dtype=dtype, device=device).view(1, 4))
with torch.no_grad():
x_coords = torch.linspace(0, w - 1, w, device=k.device)
y_coords = torch.linspace(0, h - 1, h, device=k.device)
fx, fy, cx, cy = k[0, 0], k[0, 1], k[0, 2], k[0, 3]
x_normalized = (x_coords - cx) / fx # (w)
y_normalized = (y_coords - cy) / fy # (h)
p_unit_sphere_or_plane = torch.empty((1, h, w, 3), dtype=k.dtype, device=k.device)
p_unit_sphere_or_plane[..., 0] = x_normalized.view(1, 1, w)
p_unit_sphere_or_plane[..., 1] = y_normalized.view(1, h, 1)
p_unit_sphere_or_plane[..., 2] = 1
p_unit_sphere_or_plane = th.Variable(p_unit_sphere_or_plane) # (batch_size=1, h, w, 3)
optim_vars = depth1, depth2
aux_vars = image1, image2, p_unit_sphere_or_plane, k, T_wc1, T_wc2
weight = th.ScaleCostWeight(torch.tensor(1., dtype=dtype, device=device))
cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_pose, 1, weight,
aux_vars=aux_vars)
objective = th.Objective(dtype=k.dtype)
objective.add(cost_function)
optimizer = th.LevenbergMarquardt(
objective,
linear_solver_cls=th.BaspachoSparseSolver,
linearization_cls=th.SparseLinearization,
)
theseus_layer = th.TheseusLayer(optimizer)
theseus_outputs, info = theseus_layer.forward(optimizer_kwargs={
"verbose": True,
"adaptive_damping": True})
if __name__ == "__main__":
main()
By setting smaller w and h, try autograd_mode="dense"
, I find it's a bug for theseus:
import torch
import theseus as th
import torchlie.functional as lieF
def _photometric_error(T_wc1: th.SE3, T_wc2: th.SE3,
d1: th.Vector, d2: th.Vector,
image1: th.Variable, image2: th.Variable,
p_unit_sphere_or_plane: th.Variable,
k: th.Variable) -> torch.Tensor:
"""note: undistort and crop image ahead, since mask not supported."""
batch_size = image1.shape[0]
h = image1.shape[2]
w = image1.shape[3]
# bidirectional projection: 2 -> 1
T_c1c2 = T_wc1.inverse().compose(T_wc2)
p_c1 = lieF.SE3.transform(T_c1c2.tensor, p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1))
p1 = p_c1[:, :, :, :2] / p_c1[:, :, :, 2, None] # (batch_size, h, w, 2)
p1 = p1 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
image2_in_1 = torch.nn.functional.grid_sample(image1.tensor, p1, padding_mode="border")
err12 = image2_in_1 - image2.tensor
# bidirectional projection: 1 -> 2
T_c2c1 = T_c1c2.inverse()
p_c2 = lieF.SE3.transform(T_c2c1.tensor, p_unit_sphere_or_plane.tensor * d1.tensor.view(batch_size, h, w, 1))
p2 = p_c2[:, :, :, :2] / p_c2[:, :, :, 2, None] # (batch_size, h, w, 2)
p2 = p2 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
image1_in_2 = torch.nn.functional.grid_sample(image2.tensor, p2, padding_mode="border")
err21 = image1_in_2 - image1.tensor
# the original error dim is too big, make it impossible for Jacobian (batch_size, err_dim, var_dof)
err = torch.cat((err21, err12), dim=1)
err = torch.nn.functional.huber_loss(err, torch.zeros(1, dtype=err.dtype, device=err.device),
reduction="none", delta=0.5)
return torch.sum(err, dim=(1, 2, 3)).unsqueeze(1) # (batch_size, 1)
def photometric_error_fix_pose(optim_vars, aux_vars) -> torch.Tensor:
d1: th.Vector = optim_vars[0] # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
d2: th.Vector = optim_vars[1] # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
image1: th.Variable = aux_vars[0] # (batch_size, 1, h, w) {normalized_intensity}
image2: th.Variable = aux_vars[1] # (batch_size, 1, h, w) {normalized_intensity}
p_unit_sphere_or_plane: th.Variable = aux_vars[2] # (batch_size=1, h, w, 3)
k: th.Variable = aux_vars[3] # (batch_size=1, 4) {fx, fy, cx, cy}
T_wc1: th.SE3 = aux_vars[4]
T_wc2: th.SE3 = aux_vars[5]
return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)
def main():
device = "cuda:0"
dtype = torch.float32
h = 10
w = 10
image1 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
image2 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
depth1 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
depth2 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
T_wc1 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
T_wc2 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
k = th.Variable(torch.tensor([481.20, -480.00, 319.50, 239.50], dtype=dtype, device=device).view(1, 4))
with torch.no_grad():
x_coords = torch.linspace(0, w - 1, w, device=k.device)
y_coords = torch.linspace(0, h - 1, h, device=k.device)
fx, fy, cx, cy = k[0, 0], k[0, 1], k[0, 2], k[0, 3]
x_normalized = (x_coords - cx) / fx # (w)
y_normalized = (y_coords - cy) / fy # (h)
p_unit_sphere_or_plane = torch.empty((1, h, w, 3), dtype=k.dtype, device=k.device)
p_unit_sphere_or_plane[..., 0] = x_normalized.view(1, 1, w)
p_unit_sphere_or_plane[..., 1] = y_normalized.view(1, h, 1)
p_unit_sphere_or_plane[..., 2] = 1
p_unit_sphere_or_plane = th.Variable(p_unit_sphere_or_plane) # (batch_size=1, h, w, 3)
optim_vars = depth1, depth2
aux_vars = image1, image2, p_unit_sphere_or_plane, k, T_wc1, T_wc2
weight = th.ScaleCostWeight(torch.tensor(1., dtype=dtype, device=device))
cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_pose, 1, weight,
aux_vars=aux_vars, autograd_mode="dense")
objective = th.Objective(dtype=k.dtype)
objective.device = device
objective.add(cost_function)
optimizer = th.LevenbergMarquardt(
objective,
linear_solver_cls=th.BaspachoSparseSolver,
linearization_cls=th.SparseLinearization,
)
theseus_layer = th.TheseusLayer(optimizer)
theseus_outputs, info = theseus_layer.forward(optimizer_kwargs={
"verbose": True,
"adaptive_damping": True})
if __name__ == "__main__":
main()
Traceback (most recent call last):
File "/home/huangkun/Git/lora_slam/test/reproduce.py", line 98, in <module>
main()
File "/home/huangkun/Git/lora_slam/test/reproduce.py", line 86, in main
optimizer = th.LevenbergMarquardt(
File "/home/huangkun/Git/theseus/theseus/optimizer/nonlinear/levenberg_marquardt.py", line 69, in __init__
super().__init__(
File "/home/huangkun/Git/theseus/theseus/optimizer/nonlinear/nonlinear_least_squares.py", line 90, in __init__
self.linear_solver = linear_solver_cls(
File "/home/huangkun/Git/theseus/theseus/optimizer/linear/baspacho_sparse_solver.py", line 45, in __init__
self.reset()
File "/home/huangkun/Git/theseus/theseus/optimizer/linear/baspacho_sparse_solver.py", line 111, in reset
self.symbolic_decomposition = SymbolicDecomposition(
RuntimeError: Expected device == "cpu" || device == "cuda" to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.)
baspacho_sparse_solver only accept device name as "cpu" or "cuda", but mine is "cuda:0"
After disabling the device check by comment line 775 in objective.py, 'autograd_mode="dense"' works. But still the same error for vmap. 'RuntimeError: sym_strides() called on an undefined Tensor'
import torch
import theseus as th
import torchlie.functional as lieF
def _photometric_error(T_wc1: th.SE3, T_wc2: th.SE3,
d1: th.Vector, d2: th.Vector,
image1: th.Variable, image2: th.Variable,
p_unit_sphere_or_plane: th.Variable,
k: th.Variable) -> torch.Tensor:
"""note: undistort and crop image ahead, since mask not supported."""
batch_size = image1.shape[0]
h = image1.shape[2]
w = image1.shape[3]
# bidirectional projection: 2 -> 1
T_c1c2 = T_wc1.inverse().compose(T_wc2)
p_c1 = lieF.SE3.transform(T_c1c2.tensor, p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1))
p1 = p_c1[:, :, :, :2] / p_c1[:, :, :, 2, None] # (batch_size, h, w, 2)
p1 = p1 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
image2_in_1 = torch.nn.functional.grid_sample(image1.tensor, p1, padding_mode="border")
err12 = image2_in_1 - image2.tensor
# bidirectional projection: 1 -> 2
T_c2c1 = T_c1c2.inverse()
p_c2 = lieF.SE3.transform(T_c2c1.tensor, p_unit_sphere_or_plane.tensor * d1.tensor.view(batch_size, h, w, 1))
p2 = p_c2[:, :, :, :2] / p_c2[:, :, :, 2, None] # (batch_size, h, w, 2)
p2 = p2 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
image1_in_2 = torch.nn.functional.grid_sample(image2.tensor, p2, padding_mode="border")
err21 = image1_in_2 - image1.tensor
# the original error dim is too big, make it impossible for Jacobian (batch_size, err_dim, var_dof)
err = torch.cat((err21, err12), dim=1)
err = torch.nn.functional.huber_loss(err, torch.zeros(1, dtype=err.dtype, device=err.device),
reduction="none", delta=0.5)
return torch.sum(err, dim=(1, 2, 3)).unsqueeze(1) # (batch_size, 1)
def photometric_error_fix_pose(optim_vars, aux_vars) -> torch.Tensor:
d1: th.Vector = optim_vars[0] # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
d2: th.Vector = optim_vars[1] # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
image1: th.Variable = aux_vars[0] # (batch_size, 1, h, w) {normalized_intensity}
image2: th.Variable = aux_vars[1] # (batch_size, 1, h, w) {normalized_intensity}
p_unit_sphere_or_plane: th.Variable = aux_vars[2] # (batch_size=1, h, w, 3)
k: th.Variable = aux_vars[3] # (batch_size=1, 4) {fx, fy, cx, cy}
T_wc1: th.SE3 = aux_vars[4]
T_wc2: th.SE3 = aux_vars[5]
return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)
def main():
device = torch.device("cuda")
dtype = torch.float32
h = 10
w = 10
image1 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
image2 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
depth1 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
depth2 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
T_wc1 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
T_wc2 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
k = th.Variable(torch.tensor([481.20, -480.00, 319.50, 239.50], dtype=dtype, device=device).view(1, 4))
with torch.no_grad():
x_coords = torch.linspace(0, w - 1, w, device=k.device)
y_coords = torch.linspace(0, h - 1, h, device=k.device)
fx, fy, cx, cy = k[0, 0], k[0, 1], k[0, 2], k[0, 3]
x_normalized = (x_coords - cx) / fx # (w)
y_normalized = (y_coords - cy) / fy # (h)
p_unit_sphere_or_plane = torch.empty((1, h, w, 3), dtype=k.dtype, device=k.device)
p_unit_sphere_or_plane[..., 0] = x_normalized.view(1, 1, w)
p_unit_sphere_or_plane[..., 1] = y_normalized.view(1, h, 1)
p_unit_sphere_or_plane[..., 2] = 1
p_unit_sphere_or_plane = th.Variable(p_unit_sphere_or_plane) # (batch_size=1, h, w, 3)
optim_vars = depth1, depth2
aux_vars = image1, image2, p_unit_sphere_or_plane, k, T_wc1, T_wc2
weight = th.ScaleCostWeight(torch.tensor(1., dtype=dtype, device=device))
cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_pose, 1, weight,
aux_vars=aux_vars)
objective = th.Objective(dtype=k.dtype)
objective.to(device=device)
objective.add(cost_function)
optimizer = th.LevenbergMarquardt(
objective,
linear_solver_cls=th.BaspachoSparseSolver,
linearization_cls=th.SparseLinearization,
)
theseus_layer = th.TheseusLayer(optimizer)
theseus_outputs, info = theseus_layer.forward(optimizer_kwargs={
"verbose": True,
"adaptive_damping": True})
if __name__ == "__main__":
main()
Thanks!
I played a bit with this and it seems that torch.vmap
doesn't like this operation inside _photometric_error
--> p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1)
(particularly, the view function). You'll need to rewrite this line in some way without using view if you want to use vmap
. Note that this autograd mode is only useful if your batch size is > 1.
I also ran with autograd mode dense, with a smaller image size (48 64), and the code also works. However for your full image size I'm not sure you'll be able to run our optimizers. You have two optimization variables of size 480 640 = 307200, so you'll end having to solve a dense linear system with matrix size of 614000 * 614000, which is something like 1.5TB of memory.
Maybe you need to revisit how you are formulating this problem?
Oh sorry, I missed your most recent messages. I'll open an issue for the Baspacho bug, thanks for catching this!
Thanks, Do you have any suggestions about the choice of linear_solver and linearization for this problem?
For problems with many cost functions and sparse connectivity, Baspacho is usually the best choice. On the other hand, if you have a single cost function, then there is no advantage from using a sparse solver, because the linear systems will be dense; in that case CholeskyDenseSolver is probably better. When there are only a few cost functions, it's hard to say, might require some experimentation.
That being said, for your current formulation the main issue is the extremely large number of optimization variables. Are you able to reformulate this problem in some way so that the dimension of the optimization variables is lower?
Thanks, I will try sparse depth encoding. BTW, the opt variable needs to be a subclass of Manifold as said by the doc. But I find there is no Matrix-type for Manifold.
@EXing We don't have a Matrix class, unfortunately, as we prioritized other features. We do welcome community contributions and pull requests, so, if you are interested we can give you some guidance on how to get started.
I will try but don't have enough time to do this. BTW, it seems vmap support view() function
import torch
# Define a sample tensor
batch_size = 5
h = 4
w = 4
tensor = torch.rand((batch_size, h * w))
# Define a function to reshape each tensor in the batch
def reshape_tensor(tensor):
return tensor.view(h, w)
# Use vmap to apply the reshape function to each tensor in the batch
result = torch.func.vmap(reshape_tensor)(tensor)
print(result.shape) # Output: (5, 4, 4)
By downsampling depth (60, 80) outside and upsampling (480, 640) inside cost function, the optimization is able to run. But it's very slow and memory-consuming, any suggestions?
I found out the dense solver is faster. But
CholeskyDenseSolver with error, only LUDenseSolver works.
RuntimeError: There was an error while running the linear optimizer. Original error message: linalg.cholesky: (Batch element 0): The factorization could not be completed because the input is not positive-definite (the leading minor of order 641 is not positive-definite).
The problem my not be necessarily the use of view but some combination of view with the way the tensor is created, or with other operations. What I know is that when I replaced the operation p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1)
with p_unit_sphere_or_plane.tensor
I was able to run with vmap
.
That's strange about LUDenseSolver working but CholeskyDenseSolver not. Do you have a repro?
LUDenseSolver only works with LevenbergMarquardt
If you are using GaussNewton, then that's not too surprising, because we don't use damping when solving the linear system, and you can get the kind of error you reported above in some cases.
The problem my not be necessarily the use of view but some combination of view with the way the tensor is created, or with other operations. What I know is that when I replaced the operation
p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1)
withp_unit_sphere_or_plane.tensor
I was able to run withvmap
.That's strange about LUDenseSolver working but CholeskyDenseSolver not. Do you have a repro?
still the same code
import torch
import theseus as th
import torchlie.functional as lieF
def _photometric_error(T_wc1: th.SE3, T_wc2: th.SE3,
d1: th.Vector, d2: th.Vector,
image1: th.Variable, image2: th.Variable,
p_unit_sphere_or_plane: th.Variable,
k: th.Variable) -> torch.Tensor:
"""note: undistort and crop image ahead, since mask not supported."""
batch_size = image1.shape[0]
h = image1.shape[2]
w = image1.shape[3]
# bidirectional projection: 2 -> 1
T_c1c2 = T_wc1.inverse().compose(T_wc2)
p_c1 = lieF.SE3.transform(T_c1c2.tensor, p_unit_sphere_or_plane.tensor * d2.tensor.view(batch_size, h, w, 1))
p1 = p_c1[:, :, :, :2] / p_c1[:, :, :, 2, None] # (batch_size, h, w, 2)
p1 = p1 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
image2_in_1 = torch.nn.functional.grid_sample(image1.tensor, p1, padding_mode="border")
err12 = image2_in_1 - image2.tensor
# bidirectional projection: 1 -> 2
T_c2c1 = T_c1c2.inverse()
p_c2 = lieF.SE3.transform(T_c2c1.tensor, p_unit_sphere_or_plane.tensor * d1.tensor.view(batch_size, h, w, 1))
p2 = p_c2[:, :, :, :2] / p_c2[:, :, :, 2, None] # (batch_size, h, w, 2)
p2 = p2 * k.tensor[:, :2].unsqueeze(1).unsqueeze(2) + k.tensor[:, 2:4].unsqueeze(1).unsqueeze(2)
image1_in_2 = torch.nn.functional.grid_sample(image2.tensor, p2, padding_mode="border")
err21 = image1_in_2 - image1.tensor
# the original error dim is too big, make it impossible for Jacobian (batch_size, err_dim, var_dof)
err = torch.cat((err21, err12), dim=1)
err = torch.nn.functional.huber_loss(err, torch.zeros(1, dtype=err.dtype, device=err.device),
reduction="none", delta=0.5)
return torch.sum(err, dim=(1, 2, 3)).unsqueeze(1) # (batch_size, 1)
def photometric_error_fix_pose(optim_vars, aux_vars) -> torch.Tensor:
d1: th.Vector = optim_vars[0] # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
d2: th.Vector = optim_vars[1] # unknown scale, > 0, better rescale ahead, (batch_size, h x w x 1)
image1: th.Variable = aux_vars[0] # (batch_size, 1, h, w) {normalized_intensity}
image2: th.Variable = aux_vars[1] # (batch_size, 1, h, w) {normalized_intensity}
p_unit_sphere_or_plane: th.Variable = aux_vars[2] # (batch_size=1, h, w, 3)
k: th.Variable = aux_vars[3] # (batch_size=1, 4) {fx, fy, cx, cy}
T_wc1: th.SE3 = aux_vars[4]
T_wc2: th.SE3 = aux_vars[5]
return _photometric_error(T_wc1, T_wc2, d1, d2, image1, image2, p_unit_sphere_or_plane, k)
def main():
device = "cuda:0"
dtype = torch.float32
h = int(480/8)
w = int(640/8)
image1 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
image2 = th.Variable(torch.rand(1, 1, h, w, dtype=dtype, device=device))
depth1 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
depth2 = th.Vector.rand(1, h * w, dtype=dtype, device=device, requires_grad=True)
T_wc1 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
T_wc2 = th.SE3.rand(1, dtype=dtype, device=device, requires_grad=True)
k = th.Variable(torch.tensor([481.20, -480.00, 319.50, 239.50], dtype=dtype, device=device).view(1, 4))
with torch.no_grad():
x_coords = torch.linspace(0, w - 1, w, device=k.device)
y_coords = torch.linspace(0, h - 1, h, device=k.device)
fx, fy, cx, cy = k[0, 0], k[0, 1], k[0, 2], k[0, 3]
x_normalized = (x_coords - cx) / fx # (w)
y_normalized = (y_coords - cy) / fy # (h)
p_unit_sphere_or_plane = torch.empty((1, h, w, 3), dtype=k.dtype, device=k.device)
p_unit_sphere_or_plane[..., 0] = x_normalized.view(1, 1, w)
p_unit_sphere_or_plane[..., 1] = y_normalized.view(1, h, 1)
p_unit_sphere_or_plane[..., 2] = 1
p_unit_sphere_or_plane = th.Variable(p_unit_sphere_or_plane) # (batch_size=1, h, w, 3)
optim_vars = depth1, depth2
aux_vars = image1, image2, p_unit_sphere_or_plane, k, T_wc1, T_wc2
weight = th.ScaleCostWeight(torch.tensor(1., dtype=dtype, device=device))
cost_function = th.AutoDiffCostFunction(optim_vars, photometric_error_fix_pose, 1, weight,
aux_vars=aux_vars, autograd_mode="dense")
objective = th.Objective(dtype=k.dtype)
objective.to(device=device)
objective.add(cost_function)
optimizer = th.LevenbergMarquardt(
objective,
linear_solver_cls=th.CholeskyDenseSolver,
linearization_cls=th.DenseLinearization,
)
theseus_layer = th.TheseusLayer(optimizer)
theseus_outputs, info = theseus_layer.forward(optimizer_kwargs={
"verbose": True,
"adaptive_damping": True})
if __name__ == "__main__":
main()
Something seems off about your optimization problem. The Jacobians matrix of size 1 x 9600 is mostly zeros, except for 5 elements, as shown below, resulting in a rank deficient matrix that's not possible to solve.
Something seems off about your optimization problem. The Jacobians matrix of size 1 x 9600 is mostly zeros, except for 5 elements, as shown below, resulting in a rank deficient matrix that's not possible to solve.
That's true. Thanks. By the way, should we manually set requires_grad=True for opt variables?
That's not necessary, even when using AutoDiffCostFunction
.
It's been a while so I'll close this issue. Feel free to reopen if this exact problem still persists.
❓ Questions and Help
Hi, do you have any idea about this issue?
The code has two kinds of loss to do pose-graph bundle adjustment: