Open oujieww opened 5 years ago
What is the fps you are getting. In the paper it's mentioned
The speed is calculated by the average time of processing 100 pictures with 1 batch size. We run 100 picture processing for 10 times separately and average the time. the paper also uses FP16 instead of FP 32 to achieve the desired FPS Would be great if the author could confirm this
i write a webcam demo for this ,but i can not get 120fps as reported on paper, anyone can help me ? ('cap read frame time : ', 0.03454303741455078) ('detect time: ', 0.1441190242767334)
this is my code import numpy as np import matplotlib.pyplot as plt import time
import os import caffe import cv2 from google.protobuf import text_format from caffe.proto import caffe_pb2 caffe.set_mode_gpu()
load PASCAL VOC labels
labelmap_file = 'model/voc/labelmap_voc.prototxt' file = open(labelmap_file, 'r') labelmap = caffe_pb2.LabelMap() text_format.Merge(str(file.read()), labelmap)
def get_labelname(labelmap, labels): num_labels = len(labelmap.item) labelnames = [] if type(labels) is not list: labels = [labels] for label in labels: found = False for i in xrange(0, num_labels): if label == labelmap.item[i].label: found = True labelnames.append(labelmap.item[i].display_name) break assert found == True return labelnames
model_def = 'model/voc/deploy_merged.prototxt' model_weights = 'model/voc/pelee_merged.caffemodel'
net = caffe.Net(model_def, # defines the structure of the model model_weights, # contains the trained weights caffe.TEST) # use test mode (e.g., don't perform dropout)
input preprocessing: 'data' is the name of the input blob == net.inputs[0]
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2, 0, 1)) transformer.set_input_scale('data', 0.017) transformer.set_mean('data', np.array([103.94,116.78,123.68])) # mean pixel transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1] transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB font = cv2.FONT_HERSHEY_SIMPLEX def open_cam_onboard(width, height):
On versions of L4T previous to L4T 28.1, flip-method=2
def do_detect(image,img2): transformed_image = transformer.preprocess('data', image) net.blobs['data'].data[...] = transformed_image
set net to batch size of 1
image_resize = 304 net.blobs['data'].reshape(1,3,image_resize,image_resize)
capture = open_cam_onboard(800, 600) while(True): time0=time.time() ret, frame = capture.read() frame1=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) frame1=frame1/255. print("cap read frame time : ",time.time()-time0) time1=time.time() res=do_detect(frame1,frame) print("detect time: ",time.time()-time1) cv2.imshow('frame', res) if cv2.waitKey(1) == ord('q'): break