Thanks for your great work! In BaseTracker.track function, the returned results are mask, logit, painted_iamge. But currently, the code returns final_mask two times as below:
@torch.no_grad()
def track(self, frame, first_frame_annotation=None):
"""
Input:
frames: numpy arrays (H, W, 3)
logit: numpy array (H, W), logit
Output:
mask: numpy arrays (H, W)
logit: numpy arrays, probability map (H, W)
painted_image: numpy array (H, W, 3)
"""
if first_frame_annotation is not None: # first frame mask
# initialisation
mask, labels = self.mapper.convert_mask(first_frame_annotation)
mask = torch.Tensor(mask).to(self.device)
self.tracker.set_all_labels(list(self.mapper.remappings.values()))
else:
mask = None
labels = None
# prepare inputs
frame_tensor = self.im_transform(frame).to(self.device)
# track one frame
probs, _ = self.tracker.step(frame_tensor, mask, labels) # logits 2 (bg fg) H W
# # refine
# if first_frame_annotation is None:
# out_mask = self.sam_refinement(frame, logits[1], ti)
# convert to mask
out_mask = torch.argmax(probs, dim=0)
out_mask = (out_mask.detach().cpu().numpy()).astype(np.uint8)
final_mask = np.zeros_like(out_mask)
# map back
for k, v in self.mapper.remappings.items():
final_mask[out_mask == v] = k
num_objs = final_mask.max()
painted_image = frame
for obj in range(1, num_objs+1):
if np.max(final_mask==obj) == 0:
continue
painted_image = mask_painter(painted_image, (final_mask==obj).astype('uint8'), mask_color=obj+1)
# print(f'max memory allocated: {torch.cuda.max_memory_allocated()/(2**20)} MB')
return final_mask, final_mask, painted_image
Should it be return final_mask, probs, painted_image?
Hi,
Thanks for your great work! In BaseTracker.track function, the returned results are mask, logit, painted_iamge. But currently, the code returns
final_mask
two times as below:Should it be
return final_mask, probs, painted_image
?