Hi, I am trying to reimplement Unsupervised Learning of Depth and Ego-Motion from Video with Adaptive Loss function.
The pytorch code for Sfm Learner from https://github.com/ClementPinard/SfmLearner-Pytorch
if you take a look at their photometric_reconstruction_loss from the file loss_function.py as the following:
def photometric_reconstruction_loss(tgt_img, ref_imgs, intrinsics,
depth, explainability_mask, pose,
rotation_mode='euler', padding_mode='zeros'):
def one_scale(depth, explainability_mask):
assert(explainability_mask is None or depth.size()[2:] == explainability_mask.size()[2:])
assert(pose.size(1) == len(ref_imgs))
reconstruction_loss = 0
b, _, h, w = depth.size()
downscale = tgt_img.size(2)/h
tgt_img_scaled = F.interpolate(tgt_img, (h, w), mode='area')
ref_imgs_scaled = [F.interpolate(ref_img, (h, w), mode='area') for ref_img in ref_imgs]
intrinsics_scaled = torch.cat((intrinsics[:, 0:2]/downscale, intrinsics[:, 2:]), dim=1)
warped_imgs = []
diff_maps = []
for i, ref_img in enumerate(ref_imgs_scaled):
current_pose = pose[:, i]
ref_img_warped, valid_points = inverse_warp(ref_img, depth[:,0], current_pose,
intrinsics_scaled,
rotation_mode, padding_mode)
diff = (tgt_img_scaled - ref_img_warped) * valid_points.unsqueeze(1).float()
if explainability_mask is not None:
diff = diff * explainability_mask[:,i:i+1].expand_as(diff)
reconstruction_loss += diff.abs().mean()
assert((reconstruction_loss == reconstruction_loss).item() == 1)
warped_imgs.append(ref_img_warped[0])
diff_maps.append(diff[0])
return reconstruction_loss, warped_imgs, diff_maps
warped_results, diff_results = [], []
if type(explainability_mask) not in [tuple, list]:
explainability_mask = [explainability_mask]
if type(depth) not in [list, tuple]:
depth = [depth]
total_loss = 0
for d, mask in zip(depth, explainability_mask):
loss, warped, diff = one_scale(d, mask)
total_loss += loss
warped_results.append(warped)
diff_results.append(diff)
return total_loss, warped_results, diff_results
To use the adaptive loss function, I implement as the following (change the absolute with your loss function):
def photometric_reconstruction_loss(tgt_img, ref_imgs, intrinsics,
depth, explainability_mask, pose,
rotation_mode='euler', padding_mode='zeros'):
def one_scale(depth, explainability_mask):
assert(explainability_mask is None or depth.size()[2:] == explainability_mask.size()[2:])
assert(pose.size(1) == len(ref_imgs))
reconstruction_loss = 0
b, _, h, w = depth.size()
downscale = tgt_img.size(2)/h
tgt_img_scaled = F.interpolate(tgt_img, (h, w), mode='area')
ref_imgs_scaled = [F.interpolate(ref_img, (h, w), mode='area') for ref_img in ref_imgs]
intrinsics_scaled = torch.cat((intrinsics[:, 0:2]/downscale, intrinsics[:, 2:]), dim=1)
warped_imgs = []
diff_maps = []
for i, ref_img in enumerate(ref_imgs_scaled):
adaptive = AdaptiveImageLossFunction(image_size = (ref_img.shape[3], ref_img.shape[2], ref_img.shape[1]), float_dtype=torch.float32, device='cuda:0', color_space='RGB', representation='PIXEL')
current_pose = pose[:, i]
ref_img_warped, valid_points = inverse_warp(ref_img, depth[:,0], current_pose,
intrinsics_scaled,
rotation_mode, padding_mode)
diff = (tgt_img_scaled - ref_img_warped) * valid_points.unsqueeze(1).float()
if explainability_mask is not None:
diff = diff * explainability_mask[:,i:i+1].expand_as(diff)
#Original loss
#reconstruction_loss += diff.abs().mean()
# Try my loss
diff_temp = diff.permute(0, 3, 2, 1)
reconstruction_loss += adaptive.lossfun(diff_temp).mean()
assert((reconstruction_loss == reconstruction_loss).item() == 1)
warped_imgs.append(ref_img_warped[0])
diff_maps.append(diff[0])
return reconstruction_loss, warped_imgs, diff_maps
warped_results, diff_results = [], []
if type(explainability_mask) not in [tuple, list]:
explainability_mask = [explainability_mask]
if type(depth) not in [list, tuple]:
depth = [depth]
total_loss = 0
for d, mask in zip(depth, explainability_mask):
loss, warped, diff = one_scale(d, mask)
total_loss += loss
warped_results.append(warped)
diff_results.append(diff)
return total_loss, warped_results, diff_results
However, the loss seems always stuck at around 9.5787 and has no signal for convergence. Could you have any suggestions for the implementation? Thanks so much
Hi, I am trying to reimplement Unsupervised Learning of Depth and Ego-Motion from Video with Adaptive Loss function. The pytorch code for Sfm Learner from https://github.com/ClementPinard/SfmLearner-Pytorch
if you take a look at their photometric_reconstruction_loss from the file loss_function.py as the following:
To use the adaptive loss function, I implement as the following (change the absolute with your loss function):
However, the loss seems always stuck at around 9.5787 and has no signal for convergence. Could you have any suggestions for the implementation? Thanks so much