i write a webcam demo for this ,but i can not get 120fps as reported on paper, anyone can help me ? ('cap read frame time : ', 0.03454303741455078) ('detect time: ', 0.1441190242767334)

this is my code import numpy as np import matplotlib.pyplot as plt import time

import os import caffe import cv2 from google.protobuf import text_format from caffe.proto import caffe_pb2 caffe.set_mode_gpu()

load PASCAL VOC labels

labelmap_file = 'model/voc/labelmap_voc.prototxt' file = open(labelmap_file, 'r') labelmap = caffe_pb2.LabelMap() text_format.Merge(str(file.read()), labelmap)

def get_labelname(labelmap, labels): num_labels = len(labelmap.item) labelnames = [] if type(labels) is not list: labels = [labels] for label in labels: found = False for i in xrange(0, num_labels): if label == labelmap.item[i].label: found = True labelnames.append(labelmap.item[i].display_name) break assert found == True return labelnames

model_def = 'model/voc/deploy_merged.prototxt' model_weights = 'model/voc/pelee_merged.caffemodel'

net = caffe.Net(model_def, # defines the structure of the model model_weights, # contains the trained weights caffe.TEST) # use test mode (e.g., don't perform dropout)

input preprocessing: 'data' is the name of the input blob == net.inputs[0]

transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2, 0, 1)) transformer.set_input_scale('data', 0.017) transformer.set_mean('data', np.array([103.94,116.78,123.68])) # mean pixel transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1] transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB font = cv2.FONT_HERSHEY_SIMPLEX def open_cam_onboard(width, height):

On versions of L4T previous to L4T 28.1, flip-method=2

# Use Jetson onboard camera
gst_str = ("nvcamerasrc ! "
           "video/x-raw(memory:NVMM), width=(int)800, height=(int)600, format=(string)I420, framerate=(fraction)5/1 ! "
           "nvvidconv ! video/x-raw, width=(int){}, height=(int){}, format=(string)BGRx ! "
           "videoconvert ! appsink").format(width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)

def do_detect(image,img2): transformed_image = transformer.preprocess('data', image) net.blobs['data'].data[...] = transformed_image

# Forward pass.
detections = net.forward()['detection_out']

# Parse the outputs.
det_label = detections[0,0,:,1]
det_conf = detections[0,0,:,2]
det_xmin = detections[0,0,:,3]
det_ymin = detections[0,0,:,4]
det_xmax = detections[0,0,:,5]
det_ymax = detections[0,0,:,6]

# Get detections with confidence higher than 0.4.
top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.4]

top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_labels = get_labelname(labelmap, top_label_indices)
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]

for i in xrange(top_conf.shape[0]):
    xmin = int(round(top_xmin[i] * image.shape[1]))
    ymin = int(round(top_ymin[i] * image.shape[0]))
    xmax = int(round(top_xmax[i] * image.shape[1]))
    ymax = int(round(top_ymax[i] * image.shape[0]))
    score = top_conf[i]
    label_name = top_labels[i]
    img2=cv2.rectangle(img2,(xmin,ymin),(xmax,ymax),(0,255,0))
    img2=cv2.putText(img2,label_name+':'+str(score),(xmin,ymin-5),font,2,(0,0,255),1)
return img2

set net to batch size of 1

image_resize = 304 net.blobs['data'].reshape(1,3,image_resize,image_resize)

capture = open_cam_onboard(800, 600) while(True): time0=time.time() ret, frame = capture.read() frame1=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) frame1=frame1/255. print("cap read frame time : ",time.time()-time0) time1=time.time() res=do_detect(frame1,frame) print("detect time: ",time.time()-time1) cv2.imshow('frame', res) if cv2.waitKey(1) == ord('q'): break

Robert-JunWang / Pelee

how can i get the fps=120 on nvidia tx2? please help me #72

load PASCAL VOC labels

input preprocessing: 'data' is the name of the input blob == net.inputs[0]

On versions of L4T previous to L4T 28.1, flip-method=2

set net to batch size of 1