crop a patch from image

liuqk3 commented 6 years ago

I write a simple test code code 1 as follows. I first generate a 3D ndarray randomly, and add a new dimensionality to represente the batch_size dimension. The box is set to [0, 0, 3, 3], and the croped width and heght of RoIAlign are set to 3 and 3, respectively. And the output is what I want.

code 1

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
from roialign.roi_align.roi_align import RoIAlign
import cv2

def to_varabile(arr, requires_grad=False, is_cuda=True):
    tensor = torch.from_numpy(arr)
    if is_cuda:
        tensor = tensor.cuda()
    var = Variable(tensor, requires_grad=requires_grad)
    return var

# the data you want
is_cuda = False

# --------------- image data generation ---------------------------
image_data = np.random.randn(3, 7, 5) * 100
image_data = np.asarray(image_data, dtype=np.float32)
image_data = image_data[np.newaxis]
# -------------- end of image data generation---------------------

boxes_data = np.asarray([[0, 0, 3, 3]], dtype=np.float32)
box_index_data = np.asarray([0], dtype=np.int32)

image_torch = to_varabile(image_data, requires_grad=True, is_cuda=is_cuda)
boxes = to_varabile(boxes_data, requires_grad=False, is_cuda=is_cuda)
box_index = to_varabile(box_index_data, requires_grad=False, is_cuda=is_cuda)

# set transform_fpcoor to False is the crop_and_resize
roi_align = RoIAlign(crop_width=3, crop_height=3, transform_fpcoor=True)
croped = roi_align(image_torch, boxes, box_index)

print(image_torch, '\n')
print(croped)

output 1

tensor([[[[ 155.2937,   27.9904,   74.7080,   66.1174,   34.2396],
          [-140.2568,  164.4559,  -88.8006,  -37.5217,  156.6059],
          [  44.5218,    1.7729,  146.5069,  149.6736,    5.5269],
          [-222.4462,   65.9821,  -52.1707, -145.4467,  -49.2179],
          [ -96.1867,  -40.4554,  -25.5354,   75.1842,  -69.6537],
          [  12.1659,  -15.9438,    0.2947,  -55.0050,    9.0175],
          [ -23.8114,   -9.1514,  -11.3899,  -33.1432, -120.8428]],

         [[  78.6541,   -1.6860,  -81.5798,  -46.7906,  -41.1748],
          [-106.7531,  -40.6971,   15.0387,   50.8834, -122.1978],
          [ -52.3712,   -2.0634,   80.5198,   92.6046,   30.9877],
          [ -44.0200,   34.4229,   83.7537,  -53.1896,   68.1574],
          [  11.3319, -117.5049,  -28.6529,   52.9562,  -59.5388],
          [ 104.2405,  148.5067,  -51.5808,  -82.6794,  104.9984],
          [  46.2745, -140.3886, -134.5971, -106.4377,  146.3157]],

         [[ -12.9685,  -22.6475,   42.1217,  -71.6224,  153.7339],
          [  -1.0168,  -76.6128,    4.3941, -157.5561,  -93.8834],
          [   7.0985,   21.7129,   -2.5151,  272.5159,  -74.3853],
          [  35.7283,  106.1746, -112.5265,  -13.9350,   26.7033],
          [ -36.1484,  -51.4258,  -78.5966,  123.6539,  -83.3441],
          [  60.9276,   63.7939,  -42.0873,  207.5641,   58.5911],
          [  28.2906,   25.9808,   95.6436,  211.1584, -151.0635]]]]) 

tensor([[[[ 155.2937,   27.9904,   74.7080],
          [-140.2568,  164.4559,  -88.8006],
          [  44.5218,    1.7729,  146.5069]],

         [[  78.6541,   -1.6860,  -81.5798],
          [-106.7531,  -40.6971,   15.0387],
          [ -52.3712,   -2.0634,   80.5198]],

         [[ -12.9685,  -22.6475,   42.1217],
          [  -1.0168,  -76.6128,    4.3941],
          [   7.0985,   21.7129,   -2.5151]]]])

Next, I modify the code 1, and I get code 2 . What I did is just change the generation of image dada. Instead generate randomly, I load a image from the disk, and pick a small patch from the image. But there are some thing wrong. code 2 and output are as follows:

code 2

import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
from roialign.roi_align.roi_align import RoIAlign
import cv2

def to_varabile(arr, requires_grad=False, is_cuda=True):
    tensor = torch.from_numpy(arr)
    if is_cuda:
        tensor = tensor.cuda()
    var = Variable(tensor, requires_grad=requires_grad)
    return var

# the data you want
is_cuda = False

# ------------------------- image data generation ---------------------------------
frame_path = '/data0/liuqk/MOTChallenge/2DMOT2015/train/TUD-Campus/img1/000068.jpg'
image_data = plt.imread(frame_path) # HxWxC
image_data = image_data[200:207, 200:205, :]
image_data = np.transpose(image_data, (2, 0, 1)) # CxHxW
image_data = np.asarray(image_data, dtype=np.float32)
image_data = image_data[np.newaxis]
# ------------------------- end of image data generation --------------------------

# image_data = image_data[np.newaxis, np.newaxis]
boxes_data = np.asarray([[0, 0, 3, 3]], dtype=np.float32)
box_index_data = np.asarray([0], dtype=np.int32)

image_torch = to_varabile(image_data, requires_grad=True, is_cuda=is_cuda)
boxes = to_varabile(boxes_data, requires_grad=False, is_cuda=is_cuda)
box_index = to_varabile(box_index_data, requires_grad=False, is_cuda=is_cuda)

# set transform_fpcoor to False is the crop_and_resize
roi_align = RoIAlign(crop_width=3, crop_height=3, transform_fpcoor=True)
croped = roi_align(image_torch, boxes, box_index)

print(image_torch, '\n')
print(croped)

output 2

tensor([[[[ 67.,  67.,  66.,  66.,  64.],
          [ 67.,  67.,  66.,  64.,  64.],
          [ 67.,  67.,  65.,  64.,  65.],
          [ 67.,  65.,  65.,  65.,  65.],
          [ 64.,  64.,  64.,  64.,  64.],
          [ 63.,  63.,  63.,  63.,  63.],
          [ 62.,  62.,  62.,  62.,  62.]],

         [[ 65.,  65.,  64.,  64.,  64.],
          [ 64.,  64.,  63.,  64.,  64.],
          [ 64.,  64.,  65.,  64.,  65.],
          [ 64.,  65.,  65.,  65.,  65.],
          [ 64.,  64.,  64.,  64.,  64.],
          [ 63.,  63.,  63.,  63.,  63.],
          [ 62.,  62.,  62.,  62.,  62.]],

         [[ 53.,  53.,  52.,  52.,  52.],
          [ 55.,  55.,  54.,  54.,  54.],
          [ 55.,  55.,  55.,  54.,  55.],
          [ 55.,  55.,  55.,  55.,  55.],
          [ 54.,  54.,  54.,  54.,  54.],
          [ 53.,  53.,  53.,  53.,  53.],
          [ 52.,  52.,  52.,  52.,  52.]]]]) 

tensor([[[[ 67.,  65.,  53.],
          [ 53.,  66.,  64.],
          [ 64.,  52.,  64.]],

         [[ 55.,  65.,  65.],
          [ 64.,  54.,  65.],
          [ 67.,  64.,  55.]],

         [[ 64.,  54.,  64.],
          [ 63.,  63.,  53.],
          [ 53.,  63.,  63.]]]])

As you can see, the out put is not what I want and I can not figure out how RoIAlign works here. So I am wondering why this happening? Can anyone tell me please?

longcw commented 6 years ago

The model didn't check if a tensor is contiguous. This will result in wrong crops as you mentioned.

The way to fix this:

# tensor = torch.from_numpy(arr)   # Replace to
tensor = torch.from_numpy(arr).contiguous()

# OR
# image_data = np.asarray(image_data, dtype=np.float32)   # Replace to
image_data = np.ascontiguousarray(image_data, dtype=np.float32)

Then I got the right crop:

tensor([[[[67., 67., 66., 66., 64.],
          [67., 67., 66., 64., 64.],
          [67., 67., 65., 64., 65.],
          [67., 65., 65., 65., 65.],
          [64., 64., 64., 64., 64.],
          [63., 63., 63., 63., 63.],
          [62., 62., 62., 62., 62.]],

         [[65., 65., 64., 64., 64.],
          [64., 64., 63., 64., 64.],
          [64., 64., 65., 64., 65.],
          [64., 65., 65., 65., 65.],
          [64., 64., 64., 64., 64.],
          [63., 63., 63., 63., 63.],
          [62., 62., 62., 62., 62.]],

         [[53., 53., 52., 52., 52.],
          [55., 55., 54., 54., 54.],
          [55., 55., 55., 54., 55.],
          [55., 55., 55., 55., 55.],
          [54., 54., 54., 54., 54.],
          [53., 53., 53., 53., 53.],
          [52., 52., 52., 52., 52.]]]], requires_grad=True) 

tensor([[[[67., 67., 66.],
          [67., 67., 66.],
          [67., 67., 65.]],

         [[65., 65., 64.],
          [64., 64., 63.],
          [64., 64., 65.]],

         [[53., 53., 52.],
          [55., 55., 54.],
          [55., 55., 55.]]]], grad_fn=<CropAndResizeFunction>)

Sorry for the mistakes.

liuqk3 commented 6 years ago

@longcw Thank you very much! You have solved my problem :)

longcw / RoIAlign.pytorch