facebookarchive / caffe2

Caffe2 is a lightweight, modular, and scalable deep learning framework.
https://caffe2.ai
Apache License 2.0
8.42k stars 1.95k forks source link

Error for GPU prediction #1290

Open rohith14 opened 6 years ago

rohith14 commented 6 years ago

I am running into couple of errors with inference or prediction performance on GPU using AlexNet CNN. This is the code

""" Usage: python alexnet_pretrain.py [batch size] [number of GPUs] """

CAFFE2_ROOT = '/local/caffe2/caffe2' CAFFE_MODELS = '/local/caffe2/build/caffe2/python/models'

from caffe2.proto import caffe2_pb2 import numpy as np import skimage.io import skimage.transform from matplotlib import pyplot import os from caffe2.python import core, workspace import urllib2 import time import math import caffe2.python._import_c_extension as C import sys print 'Required modules imported.'

change path to location of cifar10Test folder

IMAGE_FOLDER = '/local/Deep_Learning_Benchmarking/cifar10Test' BATCH_SIZE = int(sys.argv[1]) print 'BATCH SIZE: ', BATCH_SIZE BENCHMARKING = True NUM_GPUS = int(sys.argv[2])

imgFileList = []

puts images from 1 category in a list

category = 'automobile' for imgFile in os.listdir(os.path.join(IMAGE_FOLDER, category)): imgFileList.append(os.path.join(IMAGE_FOLDER, category, imgFile))

MODEL = 'bvlc_alexnet', 'exec_net.pb', 'predict_net.pb', 'ilsvrc_2012_mean.npy', 227

codes for image ID

codes = "https://gist.githubusercontent.com/aaronmarkham/cd3a6b6ac071eca6f7b4a6e40e6038aa/raw/9edb4038a37da6b5a44c3b5bc52e448ff09bfe5b/alexnet_codes" print "Config set!"

crop out the edges of the image

def crop_center(img, cropx, cropy): y,x,c = img.shape startx = x//2-(cropx//2) starty = y//2-(cropy//2) return img[starty:starty+cropy, startx:startx+cropx]

rescale the image to the given height and width

def rescale(img, input_height, input_width):

print "original image shape: " +str(img.shape) + ' and remember it should be in h,w,c!'

##print "model's input shape is %dx%d" % (input_height, input_width)
aspect = img.shape[1]/float(img.shape[0])
##print 'original aspect ratio: ' + str(aspect)

if aspect > 1:
    # landscape image
    res = int(aspect*input_height)
    imgScaled = skimage.transform.resize(img, (input_width,res))
if aspect < 1:
    # portrait image
    res = int(input_width/aspect)
    imgScaled = skimage.tansform.resize(img, (res,input_height))
if aspect == 1:
    ##square image
    imgScaled = skimage.transform.resize(img, (input_width, input_height))

return imgScaled

print 'Functions set.'

CAFFE2_ROOT = os.path.expanduser(CAFFE2_ROOT) CAFFE_MODELS = os.path.expanduser(CAFFE_MODELS)

search for necessary files

MEAN_FILE = os.path.join(CAFFE_MODELS, MODEL[0], MODEL[3]) if not os.path.exists(MEAN_FILE): mean = 128 else: mean = np.load(MEAN_FILE).mean(1).mean(1) mean = mean[:, np.newaxis, np.newaxis]

mean = 128

print 'Mean was set to: ', mean

INPUT_IMAGE_SIZE = MODEL[4]

if not os.path.exists(CAFFE2_ROOT): print "CAFFE2_ROOT doesn't exist!"

INIT_NET = os.path.join(CAFFE_MODELS, MODEL[0], MODEL[1]) print 'INIT_NET = ', INIT_NET PREDICT_NET = os.path.join(CAFFE_MODELS, MODEL[0], MODEL[2]) print 'PREDICT_NET = ', PREDICT_NET

if not os.path.exists(INIT_NET): print INIT_NET, ' not found!' else: print 'found ', INIT_NET, '...now looking for ', PREDICT_NET if not os.path.exists(PREDICT_NET): print "Caffe model file, ", PREDICT_NET, 'was not found!' else: print 'all needed files found! loading the model in the next block'

load and transform image

batches = [] nBatches = int(math.ceil(1000.0/BATCH_SIZE)) i = 0 print 'Number of batches: ', nBatches

print 'Pre-processing images...'

create batches of images

for j in range(0, nBatches): batch = np.zeros([BATCH_SIZE, 3, 227, 227]).astype(np.float32)

##populate each batch
for k in range(0, BATCH_SIZE):
    if i < len(imgFileList):
        imgFile = imgFileList[i]
        imgTemp = skimage.img_as_float(skimage.io.imread(imgFile)).astype(np.float32)
        imgTemp = rescale(imgTemp, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE)
        imgTemp = crop_center(imgTemp, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE)
        ##print 'after crop: ', imgTemp.shape

        ##switch to chw
        imgTemp = imgTemp.swapaxes(1, 2).swapaxes(0, 1)
        ##switch to bgr
        imgTemp = imgTemp[(2,1,0), :, :]

        ##remove mean for better results
        imgTemp = imgTemp * 255 - mean

        ##add to batch
        batch[k] = imgTemp
        ##print 'nchw: ', img.shape

    i+=1

batches.append(batch)

print 'Done pre-processing, initializing net...'

gpus = []

Provides option for multiple GPUs

for i in range(NUM_GPUS): gpus.append(i)

device_opts = core.DeviceOption(caffe2_pb2.CUDA, 1)

initialize the neural net

""" Version 1: This is the way Caffe2 recommends you perform prediction (with the Predictor interface). However, by default, as of June 2017, the Predictor interface does not utilize GPU computation. So, it is much slower to run prediction in this way. """

with open(INIT_NET, 'rb') as f:

init_net = f.read()

with open(PREDICT_NET, 'rb') as f:

predict_net = f.read()

p = workspace.Predictor(init_net, predict_net)

print 'Running net...'

tic = time.time()

for batch in batches:

p.run([batch])

toc = time.time() - tic

""" Version 2: This is another way to perform prediction that does utilize the GPU (the GPU usage is specified by the previously defined device_opts). The network initilaization takes much longer, but the net itself runs much faster. """ init_def = caffe2_pb2.NetDef() with open(INIT_NET, 'rb') as f: init_def.ParseFromString(f.read()) init_def.device_option.CopyFrom(device_opts) workspace.RunNetOnce(init_def.SerializeToString())

net_def = caffe2_pb2.NetDef() with open(PREDICT_NET, 'rb') as f: net_def.ParseFromString(f.read()) net_def.device_option.CopyFrom(device_opts) workspace.CreateNet(net_def.SerializeToString())

print 'Running net...'

timing

tic = time.time()

`#run the net and return prediction

for batch in batches: workspace.FeedBlob('data', batch, device_option=device_opts) workspace.RunNet('AlexNet', 1)

toc = time.time() - tic

results = workspace.FetchBlob('prob')

print 'Time elapsed running net: ', toc, ' seconds' print 'Images per second: ', 1000.0/toc

Error1 :

BATCH SIZE: 1 Config set! Functions set. Traceback (most recent call last): File "alexnet_pretrain.py", line 87, in mean = np.load(MEAN_FILE).mean(1).mean(1) File "/usr/lib/python2.7/dist-packages/numpy/lib/npyio.py", line 401, in load "Failed to interpret file %s as a pickle" % repr(file)) IOError: Failed to interpret file '/local/caffe2/build/caffe2/python/models/bvlc_alexnet/ilsvrc_2012_mean.npy' as a pickle

Error2:

BATCH SIZE: 1 Config set! Functions set. Mean was set to: 128 INIT_NET = /local/caffe2/build/caffe2/python/models/bvlc_alexnet/exec_net.pb PREDICT_NET = /local/caffe2/build/caffe2/python/models/bvlc_alexnet/predict_net.pb found /local/caffe2/build/caffe2/python/models/bvlc_alexnet/exec_net.pb ...now looking for /local/caffe2/build/caffe2/python/models/bvlc_alexnet/predict_net.pb all needed files found! loading the model in the next block Number of batches: 1000 Pre-processing images... Done pre-processing, initializing net... Traceback (most recent call last): File "alexnet_pretrain.py", line 193, in init_def.ParseFromString(f.read()) File "/usr/lib/python2.7/dist-packages/google/protobuf/message.py", line 186, in ParseFromString self.MergeFromString(serialized) File "/usr/lib/python2.7/dist-packages/google/protobuf/internal/python_message.py", line 841, in MergeFromString if self._InternalParse(serialized, 0, length) != length: File "/usr/lib/python2.7/dist-packages/google/protobuf/internal/python_message.py", line 866, in InternalParse new_pos = local_SkipField(buffer, new_pos, end, tag_bytes) File "/usr/lib/python2.7/dist-packages/google/protobuf/internal/decoder.py", line 827, in SkipField return WIRETYPE_TO_SKIPPER[wire_type](buffer, pos, end) File "/usr/lib/python2.7/dist-packages/google/protobuf/internal/decoder.py", line 797, in _RaiseInvalidWireType raise _DecodeError('Tag had invalid wire type.')

Got the AlexNet model from here https://github.com/caffe2/models/tree/master/bvlc_alexnet

Any help is appreciated, Thanks

pengfeix commented 6 years ago

For error1,I think you can remove ilsvrc_2012_mean.npy from the models folder.