A wired problem I found when testing the VOC dataset 2007

def get_conv_image_descriptor_for_image(image, model):
    im = cv2.resize(image, (224, 224)).astype(np.float32)
    print('resized: {}'.format(im.shape))
    dim_ordering = K.image_dim_ordering()
    if dim_ordering == 'th':
        # 'RGB'->'BGR'
        im = im[::-1, :, :]
        # Zero-center by mean pixel
        im[0, :, :] -= 103.939
        im[1, :, :] -= 116.779
        im[2, :, :] -= 123.68
    else:
        # 'RGB'->'BGR'
        im = im[:, :, ::-1]
        # Zero-center by mean pixel
        im[:, :, 0] -= 103.939
        im[:, :, 1] -= 116.779
        im[:, :, 2] -= 123.68

    im = im.transpose((2, 0, 1))
    print('transposed: {}'.format(im.shape))
    im = np.expand_dims(im, axis=0)
    print('expanded: {}'.format(im.shape))
    inputs = [K.learning_phase()] + model.inputs
    _convout1_f = K.function(inputs, [model.layers[31].output])
    return _convout1_f([0] + [im])

def get_state(image, history_vector, model_vgg):
    print('--------b4: {}'.format(image.shape))
    descriptor_image = get_conv_image_descriptor_for_image(image, model_vgg)
    print('--------aft: {}'.format(np.array(descriptor_image).shape))
    descriptor_image = np.reshape(descriptor_image, (visual_descriptor_size, 1))
    history_vector = np.reshape(history_vector, (number_of_actions*actions_of_history, 1))
    state = np.vstack((descriptor_image, history_vector))
    return state

when deploying the function above, the output I got on screen were as follows: --------b4: (272, 500, 3) resized: (224, 224, 3) transposed: (3, 224, 224) expanded: (1, 3, 224, 224) --------aft: (1, 1, 5376)

the final output from the 31th layers of VGG16 should be 25088 (size of length), but in my code the size is 5376. Therefore the inference cannot be continued because of this dismatch between two vectors, one should be 25088, but is 5376 instead.

imatge-upc / detection-2016-nipsws

A wired problem I found when testing the VOC dataset 2007 #20