However, when it iterates to 2000+ images, it throws an error:
File "mmdetection/mmdet/datasets/custom.py", line 220, in __getitem__
data = self.prepare_train_img(idx)
File "mmdetection/mmdet/datasets/custom.py", line 243, in prepare_train_img
return self.pipeline(results)
File "mmdetection/mmdet/datasets/pipelines/compose.py", line 41, in __call__
data = t(data)
File "mmdetection/mmdet/datasets/pipelines/transforms.py", line 575, in __call__
results[key] = results[key].translate(
File "mmdetection/mmdet/core/mask/structures.py", line 420, in translate
translated_masks = mmcv.imtranslate(
File "/data1/opt/.cache/matlab/anaconda3/lib/python3.8/site-packages/mmcv/image/geometric.py", line 719, in imtranslate
translated = cv2.warpAffine(
cv2.error: OpenCV(4.5.4-dev) /tmp/pip-req-build-h45n7_hz/opencv/modules/imgproc/src/imgwarp.cpp:2595: error: (-215:Assertion failed) src.cols > 0 && src.rows > 0 in function 'warpAffine
it was in FixedShift function. Here is the code, just modified from RandomShift.
@PIPELINES.register_module()
class FixedShift:
"""Shift the image and box given shift pixels and probability.
Args:
shift_ratio (float): Probability of shifts. Default 0.5.
max_shift_px (int): The max pixels for shifting. Default 32.
filter_thr_px (int): The width and height threshold for filtering.
The bbox and the rest of the targets below the width and
height threshold will be filtered. Default 1.
"""
def __init__(self, shift_ratio=1, fixed_size=512, max_shift_px=32, filter_thr_px=1):
assert 0 <= shift_ratio <= 1
assert max_shift_px >= 0
self.shift_ratio = shift_ratio
self.max_shift_px = max_shift_px
self.filter_thr_px = int(filter_thr_px)
self.fixed_size = fixed_size
# The key correspondence from bboxes to labels.
self.bbox2label = {
'gt_bboxes': 'gt_labels',
'gt_bboxes_ignore': 'gt_labels_ignore'
}
def __call__(self, results):
"""Call function to random shift images, bounding boxes.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Shift results.
"""
# if random.random() < self.shift_ratio:
# import ipdb; ipdb.set_trace()
ori_img_h, ori_img_w = results['img_shape'][:2]
trans_x = 0
trans_y = 0
if ori_img_h > ori_img_w:
trans_x = (self.fixed_size - ori_img_w) // 2
else:
trans_y = (self.fixed_size - ori_img_h) // 2
img_shape = results['img'].shape[:2]
random_shift_x = trans_x
random_shift_y = trans_y
new_x = max(0, random_shift_x)
orig_x = max(0, -random_shift_x)
new_y = max(0, random_shift_y)
orig_y = max(0, -random_shift_y)
# TODO: support mask and semantic segmentation maps.
for key in results.get('bbox_fields', []):
bboxes = results[key].copy()
bboxes[..., 0::2] += random_shift_x
bboxes[..., 1::2] += random_shift_y
# clip border
bboxes[..., 0::2] = np.clip(bboxes[..., 0::2], 0, img_shape[1])
bboxes[..., 1::2] = np.clip(bboxes[..., 1::2], 0, img_shape[0])
# remove invalid bboxes
bbox_w = bboxes[..., 2] - bboxes[..., 0]
bbox_h = bboxes[..., 3] - bboxes[..., 1]
valid_inds = (bbox_w > self.filter_thr_px) & (
bbox_h > self.filter_thr_px)
# If the shift does not contain any gt-bbox area, skip this
# image.
if key == 'gt_bboxes' and not valid_inds.any():
return results
bboxes = bboxes[valid_inds]
results[key] = bboxes
# label fields. e.g. gt_labels and gt_labels_ignore
label_key = self.bbox2label.get(key)
if label_key in results:
results[label_key] = results[label_key][valid_inds]
for key in results.get('img_fields', ['img']):
img = results[key]
new_img = np.zeros_like(img)
img_h, img_w = img.shape[:2]
new_h = img_h - np.abs(random_shift_y)
new_w = img_w - np.abs(random_shift_x)
new_img[new_y:new_y + new_h, new_x:new_x + new_w] \
= img[orig_y:orig_y + new_h, orig_x:orig_x + new_w]
results[key] = new_img
for key in results.get('mask_fields', []): # gt_masks
if results[key] is None:
continue
if ori_img_h > ori_img_w:
results[key] = results[key].translate(
out_shape=results['pad_shape'][:2],
offset=trans_x,
direction='horizontal',)
else:
results[key] = results[key].translate(
out_shape=results['pad_shape'][:2],
offset=trans_y,
direction='vertical',)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(max_shift_px={self.max_shift_px}, '
return repr_str
After debugging, I found the channel of the BitmapMasks of that image is 556, which is too large. meaning the instances of that image is 556. So I write a script and found the maximum channel can be handled for warpAffine function is 512, images whose channel is larger than 512 will canse an error.
File "mmdetection/mmdet/datasets/custom.py", line 220, in __getitem__
data = self.prepare_train_img(idx)
File "mmdetection/mmdet/datasets/custom.py", line 243, in prepare_train_img
return self.pipeline(results)
File "mmdetection/mmdet/datasets/pipelines/compose.py", line 41, in __call__
data = t(data)
File "mmdetection/mmdet/datasets/pipelines/transforms.py", line 575, in __call__
results[key] = results[key].translate(
File "mmdetection/mmdet/core/mask/structures.py", line 420, in translate
translated_masks = mmcv.imtranslate(
File "/data1/opt/.cache/matlab/anaconda3/lib/python3.8/site-packages/mmcv/image/geometric.py", line 719, in imtranslate
translated = cv2.warpAffine(
cv2.error: OpenCV(4.5.4-dev) /tmp/pip-req-build-h45n7_hz/opencv/modules/imgproc/src/imgwarp.cpp:2595: error: (-215:Assertion failed) src.cols > 0 && src.rows > 0 in function 'warpAffine
Additional information
I use opencv-python==4.5.4-dev. I think it's a bug of OpenCV, but in mmcv, how can we solve it? Looking forward to your reply.
Prerequisite
Task
I'm using the official example scripts/configs for the officially supported tasks/models/datasets.
Branch
master branch https://github.com/open-mmlab/mmdetection
Environment
Python: 3.8.5 (default, Sep 4 2020, 07:30:14) [GCC 7.3.0] CUDA available: True GPU 0,1,2,3,4,5,6,7: NVIDIA A100-PCIE-40GB CUDA_HOME: /usr/local/cuda NVCC: Build cuda_11.1.TC455_06.29069683_0 GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0 PyTorch: 1.7.1 PyTorch compiling details: PyTorch built with:
TorchVision: 0.8.2 OpenCV: 4.5.4-dev MMCV: 1.3.16 MMCV Compiler: GCC 7.3 MMCV CUDA Compiler: 11.0 MMDetection: 2.18.0+db256a1
Reproduces the problem - code sample
I use the following config to build Lvis dataset and do something (iterate to extract the bbox and masks from the annotation)
However, when it iterates to 2000+ images, it throws an error:
it was in
FixedShift
function. Here is the code, just modified fromRandomShift
.After debugging, I found the channel of the BitmapMasks of that image is 556, which is too large. meaning the instances of that image is 556. So I write a script and found the maximum channel can be handled for
warpAffine
function is 512, images whose channel is larger than 512 will canse an error.Reproduces the problem - command or script
Reproduces the problem - error message
Additional information
I use opencv-python==4.5.4-dev. I think it's a bug of OpenCV, but in mmcv, how can we solve it? Looking forward to your reply.