BVLC / caffe

Caffe: a fast open framework for deep learning.
http://caffe.berkeleyvision.org/
Other
33.96k stars 18.72k forks source link

Failed inference with nyud-fcn32s-hha #7064

Open wangxudong-cq opened 1 year ago

wangxudong-cq commented 1 year ago

Important - read before submitting

Please read the guidelines for contributing before submitting this issue!

Please do not post installation, build, usage, or modeling questions, or other requests for help to Issues. Use the caffe-users list instead. This helps developers maintain a clear, uncluttered, and efficient view of the state of Caffe.

Issue summary

Using the model nyud-fcn32s-hha-heavy.caffemodel to infer images in NYU Depth Dataset V2, the result is wrong. Inference script as blow.

import numpy as np
from PIL import Image

import os 
import sys
current_path = os.path.dirname(__file__)
project_path = os.path.dirname(os.path.dirname(__file__))
sys.path.append(project_path)

import caffe
import vis

def zero_multi_padding(in_array, padding_size=0):
    in_channels, h, w = in_array.shape
    padding_array = np.zeros([in_channels, h + 2 * padding_size, w + 2 * padding_size],dtype=in_array.dtype)
    for i in range(in_channels):
        for xx in range(h):
            for yy in range(w):
                padding_array[i, xx + padding_size, yy + padding_size] = in_array[i, xx, yy]
    return padding_array

# the demo image is "2007_000129" from PASCAL VOC

# load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe
# im = Image.open(project_path+'/demo/image.jpg')
# in_ = np.array(im, dtype=np.float32)
# print(in_.shape)
# in_ = in_[:,:,::-1]
# in_ -= np.array((104.00698793,116.66876762,122.67891434))
# in_ = in_.transpose((2,0,1))
# in_pad = zero_multi_padding(in_, 99)
# print(in_pad.shape)
# print(in_.shape)

im = Image.open('/home/azure002/my_worksapce/fcn.berkeleyvision.org/nyu_images/169.jpg')
# im = Image.open('../demo/image.jpg')
new_image = im.resize((1024,1024), Image.Resampling.BICUBIC)
new_image.save(current_path+'/test_init.png')

in_ = np.array(im, dtype=np.float32)
in_ = in_[:,:,::-1]
in_ -= np.array((104.00698793,116.66876762,122.67891434))
in_ = in_.transpose((2,0,1))

# load net
net = caffe.Net(current_path+'/deploy.prototxt', current_path+'/nyud-fcn32s-hha-heavy.caffemodel', caffe.TEST)
# # shape for input (data blob is N x C x H x W), set data
# net.blobs['None'].reshape(1, *in_.shape)
# net.blobs['None'].data[...] = in_
# net.blobs['data'].reshape(1, *in_pad.shape)
# net.blobs['data'].data[...] = in_pad
net.blobs['data'].reshape(1, *in_.shape)
net.blobs['data'].data[...] = in_
# run net and take argmax for prediction
net.forward()
out = net.blobs['score'].data[0].argmax(axis=0)

import matplotlib.pyplot as plt
plt.imshow(out,cmap='gray') 
plt.axis('off')             
plt.savefig(current_path+'/test.png')  

# visualize segmentation in PASCAL VOC colors
voc_palette = vis.make_palette(40)
print(voc_palette)
out_im = Image.fromarray(vis.color_seg(out, voc_palette))
out_im.save(current_path+'/output.png')
masked_im = Image.fromarray(vis.vis_seg(im, out, voc_palette))
masked_im.save(current_path+'/visualization.jpg')

IMAGE: PROCESS: image

Steps to reproduce

run the python script

Tried solutions

System configuration

Issue checklist