balancap / SSD-Tensorflow

Single Shot MultiBox Detector in TensorFlow
4.11k stars 1.89k forks source link

tensorflow ssd video #87

Open zxq309 opened 7 years ago

zxq309 commented 7 years ago

can the tensorflow-ssd test on videos,and the speed?thank you

jet309 commented 7 years ago

yes,the same as pictures

zxq309 commented 7 years ago

thanks very much your answer. can you provide your test video code?

123chengbo commented 7 years ago

you need to load a video,then detect it frame by frame

for example: ssd_test_image.py

test on some demo image and ........(modify from this)

cap = cv2.VideoCapture('1.avi') # firstly,you should import cv2 while(cap.isOpened()):

get a frame

ret, img = cap.read()
# exchange channel1/3, because cv2 image is different from plt
(r, g, b) = cv2.split(img)
img = cv2.merge([b, g, r])
.........# then use this img for detect, 
zxq309 commented 7 years ago

@123chengbo thanks very much your answer.another question:test on Videos,but some frame the ssd algorithm can not detect, Why does this happen?

123chengbo commented 7 years ago

you should give more details,pictures、your code......

mingfengwuye commented 7 years ago

image

zxq309 commented 7 years ago

@123chengbo test ssd on videos: in the one frame can detect object, but sometimes another frame can not detect object ,why this happen?

lfxx commented 7 years ago

请问你解决这个问题了吗,我现在可以检测视频,但是出来的结果却是将视频分割成图片一帧帧的显示,我想要的是播放视频的同时在视频上实时显示检测结果啊,请问可以实现吗

Sarwar1000 commented 6 years ago

you should change few things

in visualization.py change in Matplotlib show...

def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5): """Visualize bounding boxes. Largely inspired by SSD-MXNET! """ height = img.shape[0] width = img.shape[1] colors = dict() for i in range(classes.shape[0]): cls_id = int(classes[i]) if cls_id >= 0: score = scores[i] if cls_id not in colors: colors[cls_id] = (random.random(), random.random(), random.random()) ymin = int(bboxes[i, 0] height) xmin = int(bboxes[i, 1] width) ymax = int(bboxes[i, 2] height) xmax = int(bboxes[i, 3] width) tl = (xmin, ymin) br = (xmax, ymax) img = cv2.rectangle(img, tl, br, colors[cls_id], 2) class_name = str(cls_id) img = cv2.putText(img, '{:s} | {:.3f}'.format(class_name, score), tl, cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)

cv2.imshow('img', img)

and then make a new python file like this

import os import math import random

import numpy as np import tensorflow as tf import cv2 import time slim = tf.contrib.slim import matplotlib.pyplot as plt import matplotlib.image as mpimg import sys sys.path.append('../') from nets import ssd_vgg_300, ssd_common, np_methods from preprocessing import ssd_vgg_preprocessing from notebooks import visualization gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=dataformat)): predictions, localisations, , _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) ssd_anchors = ssd_net.anchors(net_shape) def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):

rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],
            feed_dict={img_input: img})

rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)

rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
return rclasses, rscores, rbboxes

capture = cv2.VideoCapture('video.avi') ii = 0 frame_rate_divider = 12 #for skiping frame formula= 60/fps while (capture.isOpened()): stime = time.time() #for timing the frame time

ret, frame = capture.read()   # ret is true or false (if video is playing then its true)
if ii % frame_rate_divider == 0:
   rclasses, rscores, rbboxes =  process_image(frame)
   if ret:       
      visualization.plt_bboxes(frame, rclasses, rscores, rbboxes)
      ii += 1
else:
   ii += 1
if cv2.waitKey(1) & 0xFF == ord('q'):    #if we hit the "Q" key it will go to next line
        break

capture.release() cv2.destroyAllWindows()

cborelc commented 6 years ago

Here is the code that worked for me based on the above posts

webcam.py:

import os import math import random

import numpy as np import tensorflow as tf import cv2 import time slim = tf.contrib.slim import matplotlib.pyplot as plt import matplotlib.image as mpimg import sys sys.path.append('../') from nets import ssd_vgg_300, ssd_common, np_methods from preprocessing import ssd_vgg_preprocessing from notebooks import visualization gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet()

with slim.arg_scope(ssd_net.arg_scope(data_format=dataformat)): predictions, localisations, , _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) ssd_anchors = ssd_net.anchors(net_shape)

def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes

capture = cv2.VideoCapture(0) while (capture.isOpened()): ret, img = capture.read() # ret is true or false (if video is playing then its true) rclasses, rscores, rbboxes = process_image(img) visualization.plt_bboxes(img, rclasses, rscores, rbboxes) if cv2.waitKey(1) & 0xFF == ord('q'): #if we hit the "Q" key it will go to next line break

capture.release() cv2.destroyAllWindows()

visualize.py:

def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5): """Visualize bounding boxes. Largely inspired by SSD-MXNET! """ cnames=['background','aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow','diningtable','dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor']

height = img.shape[0]
width = img.shape[1]
colors = dict()
for i in range(classes.shape[0]):
    cls_id = int(classes[i])
    if cls_id >= 0:
        score = scores[i]
        if cls_id not in colors:
            colors[cls_id] = (random.random(), random.random(), random.random())
        ymin = int(bboxes[i, 0] * height)
        xmin = int(bboxes[i, 1] * width)
        ymax = int(bboxes[i, 2] * height)
        xmax = int(bboxes[i, 3] * width)
        tl=(xmin,ymin)
        br=(xmax,ymax)
        img=cv2.rectangle(img,tl,br,colors[cls_id],2)
        class_name=cnames[cls_id]

        if cls_id<len(cnames):
            img=cv2.putText(img,'%s'%cnames[cls_id]+str(score),tl,cv2.FONT_HERSHEY_COMPLEX,1,(255,255,255),2)
        cv2.imshow('img',img)
EdwinChien commented 6 years ago

@cborelc Hi! Thank you for your code. I'm a student in Taiwan who is just learning about image recognition. I have tried your code for several times but there is one error which I can't understand... Could you please help me for solve this problem...? Sorry for asking you about this, thank your with all my heart. default

china56321 commented 4 years ago

请问你解决这个问题了吗,我现在可以检测视频,但是出来的结果却是将视频分割成图片一帧帧的显示,我想要的是播放视频的同时在视频上实时显示检测结果啊,请问可以实现吗

请问你现在解决这个问题了吗?可以参考下你的代码吗?