MhLiao / TextBoxes_plusplus

TextBoxes++: A Single-Shot Oriented Scene Text Detector
Other
954 stars 279 forks source link

i change demo.py to evaluate the results,but it takes about 0.4s per image when the model load only once #100

Closed AzeroGYH closed 5 years ago

AzeroGYH commented 5 years ago

i change demo.py to evaluate the results,but it takes about 0.4s per image when the model load only once .is there anything wrong with it?thank for your reply

# This script only includes detection.
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# %matplotlib inline
import time
import math
from nms import nms
from crop_image import crop_image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Make sure that caffe is on the python path:
caffe_root = './'  # this file is expected to be in {caffe_root}/examples
import os
os.chdir(caffe_root)
import sys
sys.path.insert(0, 'python')

import caffe
caffe.set_device(0)
caffe.set_mode_gpu()

config = {
    'model_def' : './models/deploy.prototxt',
    'model_weights':'/data2/myfile/TextBoxes_plusplus/models/VGGNet/text/text_polygon_precise_fix_order_384x384/VGG_text_text_polygon_precise_fix_order_384x384_iter_50000.caffemodel',   
    #'model_weights' : './models/model_icdar15.caffemodel',
    'img_dir' : '/data4/imgs2',
    'det_visu_path' : './test_images/result_images/',
    'det_save_dir' : './test_images/detection_result/',
    'crop_dir' : './test_images/crops/',
    'input_height' : 768,
    'input_width' : 768,
    'overlap_threshold' : 0.2,
    'det_score_threshold' : 0.6,
    'visu_detection' : True,
}

def prepare_network(config):
    net = caffe.Net(config['model_def'],     # defines the structure of the model
                config['model_weights'],  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)

    transformer = caffe.io.Transformer({'data': (1,3,config['input_height'], config['input_width'])})
    transformer.set_transpose('data', (2, 0, 1))
    transformer.set_mean('data', np.array([104,117,123])) # mean pixel
    transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
    transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB

    net.blobs['data'].reshape(1,3,config['input_height'], config['input_width'])
    return net,transformer

def prepare_picture(net,transformer,image_name):
    image=caffe.io.load_image(os.path.join(config['img_dir'], image_name))
    transformed_image = transformer.preprocess('data', image)
    net.blobs['data'].data[...] = transformed_image
    return image

def extract_detections(detections, det_score_threshold, image_height, image_width):
    det_conf = detections[0,0,:,2]
    det_x1 = detections[0,0,:,7]
    det_y1 = detections[0,0,:,8]
    det_x2 = detections[0,0,:,9]
    det_y2 = detections[0,0,:,10]
    det_x3 = detections[0,0,:,11]
    det_y3 = detections[0,0,:,12]
    det_x4 = detections[0,0,:,13]
    det_y4 = detections[0,0,:,14]
    # Get detections with confidence higher than 0.6.
    top_indices = [i for i, conf in enumerate(det_conf) if conf >= det_score_threshold]
    top_conf = det_conf[top_indices]
    top_x1 = det_x1[top_indices]
    top_y1 = det_y1[top_indices]
    top_x2 = det_x2[top_indices]
    top_y2 = det_y2[top_indices]
    top_x3 = det_x3[top_indices]
    top_y3 = det_y3[top_indices]
    top_x4 = det_x4[top_indices]
    top_y4 = det_y4[top_indices]

    bboxes=[]
    for i in xrange(top_conf.shape[0]):
        x1 = int(round(top_x1[i] * image_width))
        y1 = int(round(top_y1[i] * image_height))
        x2 = int(round(top_x2[i] * image_width))
        y2 = int(round(top_y2[i] * image_height))
        x3 = int(round(top_x3[i] * image_width))
        y3 = int(round(top_y3[i] * image_height))
        x4 = int(round(top_x4[i] * image_width))
        y4 = int(round(top_y4[i] * image_height))
        x1 = max(1, min(x1, image_width - 1))
        x2 = max(1, min(x2, image_width - 1))
        x3 = max(1, min(x3, image_width - 1))
        x4 = max(1, min(x4, image_width - 1))
        y1 = max(1, min(y1, image_height - 1))
        y2 = max(1, min(y2, image_height - 1))
        y3 = max(1, min(y3, image_height - 1))
        y4 = max(1, min(y4, image_height - 1))
        score = top_conf[i]
        bbox=[x1,y1,x2,y2,x3,y3,x4,y4,score]
        bboxes.append(bbox)
    return bboxes

def apply_quad_nms(bboxes, overlap_threshold):
    dt_lines = sorted(bboxes, key=lambda x:-float(x[8]))
    nms_flag = nms(dt_lines, overlap_threshold)
    results=[]
    for k,dt in enumerate(dt_lines):
        if nms_flag[k]:
            if dt not in results:
                results.append(dt)
    return results

def save_and_visu(image, results, config,image_name):
    det_save_path=os.path.join(config['det_save_dir'], image_name.split('.')[0]+'.txt')
    det_fid = open(det_save_path, 'wt')
    if config['visu_detection']:
        # visulization
        plt.clf()
        plt.imshow(image)
        currentAxis = plt.gca()
    for result in results:
        score = result[-1]
        x1 = result[0]
        y1 = result[1]
        x2 = result[2]
        y2 = result[3]
        x3 = result[4]
        y3 = result[5]
        x4 = result[6]
        y4 = result[7]
        result_str=str(x1)+','+str(y1)+','+str(x2)+','+str(y2)+','+str(x3)+','+str(y3)+','+str(x4)+','+str(y4)+','+str(score)+'\r\n'
        det_fid.write(result_str)
        if config['visu_detection']:
            quad = np.array([[x1,y1],[x2,y2],[x3,y3],[x4,y4]])
            color_quad='r'
            currentAxis.add_patch(plt.Polygon(quad, fill=False, edgecolor=color_quad, linewidth=2))

    det_fid.close()
    if config['visu_detection']:
        plt.axis('off')
                det_url=config['det_visu_path'] + image_name 
        plt.savefig(det_url, dpi=300)

# detection
k = 0
net,transformer = prepare_network(config)
start_time=time.time()
save_time = 0
for file in os.listdir(config['img_dir']):
    k = k+1
    image_name = os.path.basename(file)

    image =prepare_picture(net,transformer,image_name)

    image_height, image_width, channels=image.shape
    detections = net.forward()['detection_out']
    # Parse the outputs.

    bboxes = extract_detections(detections, config['det_score_threshold'], image_height, image_width)
    # apply non-maximum suppression
    results = apply_quad_nms(bboxes, config['overlap_threshold'])
    save_start_time = time.time()
    save_and_visu(image, results, config,image_name)
    save_time += time.time()-save_start_time

print('detection finished. avg_time = ',(time.time() - start_time - save_time)/k)
MhLiao commented 5 years ago

You can delete the visualization code for higher speed. By the way, what GPU do you use?

AzeroGYH commented 5 years ago

@MhLiao Thank you for your quick reply.i will try it,and i use Tesla M40 GPU, do you konw which is faster here than Titan Xp GPU? Is there the test code of c++ version in this project?

MhLiao commented 5 years ago

Here is a comparison between Titan Xp and Tesla M40. It seems that Titan Xp is faster. There is no C++ version test code in this repo currently.