YunYang1994 / tensorflow-yolov3

🔥 TensorFlow Code for technical report: "YOLOv3: An Incremental Improvement"
https://yunyang1994.gitee.io/2018/12/28/YOLOv3-算法的一点理解/
MIT License
3.63k stars 1.35k forks source link

试图保存视频 #319

Open He-haitao opened 5 years ago

He-haitao commented 5 years ago

road.mp4测试,试图保存为视频,每一帧处理好之后保存到一个视频里面去,但是每次运行都是异常,code里面写的 No image, 不清楚是为啥,

fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('./docs/images/output.avi', fourcc, 20, (416, 416))

with tf.Session(graph=graph) as sess:
    vid = cv2.VideoCapture(video_path)
    while vid.isOpened():  # add by me
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)

            frame_size = frame.shape[:2]
            image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size])
            image_data = image_data[np.newaxis, ...]
            prev_time = time.time()

            pred_sbbox, pred_mbbox, pred_lbbox = sess.run(
                [return_tensors[1], return_tensors[2], return_tensors[3]],
                feed_dict={return_tensors[0]: image_data})

            pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)),
                                        np.reshape(pred_mbbox, (-1, 5 + num_classes)),
                                        np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0)

            bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3)
            bboxes = utils.nms(bboxes, 0.45, method='nms')
            image = utils.draw_bbox(frame, bboxes)

            # curr_time = time.time()
            # exec_time = curr_time - prev_time
            result = np.asarray(image)
            # info = "time: %.2f ms" % (1000 * exec_time)
            # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
            result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            out.write(result)  # add by me
            # cv2.imshow("result", result)
        else:
            raise ValueError("No image!")

        if cv2.waitKey(1) & 0xFF == ord('q'): break

    vid.release()
    out.release()
    cv2.destroyAllWindows()
    print('vedio change finished')

单独打开视频,读取每一帧再去保存也是可以的,为啥经过生成box处理之后反而不显示了呢?是为啥,而且运行太慢了,这应该是我机器的原因,,,用的笔记本跑的

TsingWei commented 5 years ago

cv2的问题,由每帧img写入视频时对分辨率要求挺严格,比如你一开始定义好的分辨率416*416可能和输出img不符合,也有可能你的opencv没装对应的XVID编码器

Byronnar commented 5 years ago

我这里面 解决了保存视频的问题,而且支持 批量图片测试,速度很快。 https://github.com/Byronnar/tensorflow-serving-yolov3

changfanfan commented 4 years ago

`import cv2 import time import numpy as np import core.utils as utils import tensorflow as tf from PIL import Image,ImageGrab

def video_demo(return_elements,pb_file,vid,num_classes,input_size,storable):

tf.graph(),定义计算图

# 计算图用于构建网络,本身不进行任何实际的计算
graph = tf.Graph()
# 从pb文件将计算图导入到当前默认图中
return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements)
with tf.Session(graph=graph) as sess:
    while True:
        #按帧读取视频,vid.read()返回两个值,
        #return是bool值,如果读取帧正确则返回True,如果文件读取到结尾,他的返回值就为False
        #fram是三维矩阵,就是每一帧的图像
        return_value, frame = vid.read()
        if return_value:
            #cv2.VideoCapture()读取后的图像为BGR格式
            #将每一帧BGR图像转换成RGB图像,便于图像处理
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            #array转换成image
            image = Image.fromarray(frame)
        else:
            # raise ValueError("No image!")
            break
        #获取图像尺寸大小
        frame_size = frame.shape[:2]
        #图像预处理,这块图像其实没有发生变化
        #因为target_size和frame.shape[:2]是一样的
        image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size])
        #增加一维,就是batch_size维,默认该维度为1
        image_data = image_data[np.newaxis, ...]
        #获取每一帧处理前的时间戳
        prev_time = time.time()
        #得到三种bounding box
        pred_sbbox, pred_mbbox, pred_lbbox = sess.run(
            [return_tensors[1], return_tensors[2], return_tensors[3]],
                    feed_dict={ return_tensors[0]: image_data})
        #将预测结果组成一个矩阵
        pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)),
                                    np.reshape(pred_mbbox, (-1, 5 + num_classes)),
                                    np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0)
        #TODO:
        bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3)
        #非极大值抑制,IOU的阈值设为0.45
        bboxes = utils.nms(bboxes, 0.45, method='nms')
        #TODO:
        #得到的结果是一张张图片
        image = utils.draw_bbox(frame, bboxes)
        #获得每一帧处理后的时间戳
        curr_time = time.time()
        #计算每一帧处理时间
        exec_time = curr_time - prev_time
        # result = np.asarray(image)
        # 输出每一帧处理时间
        print("time: %.2f ms" %(1000*exec_time))
        #图片的标题
        cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        #将RGB格式转化为BGR格式,便于cv2显示
        result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        #显示图像
        cv2.imshow("result", result)
        #保存图像
        if storable:
            videoWriter.write(result)
        #键盘延迟1ms按'q'键退出
        if cv2.waitKey(1) & 0xFF == ord('q'):break

if name=="main": return_elements = ["input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0"]

模型pb文件路径

pb_file = "./yolov3_coco.pb"
# 视频图像路径
# video_path= ""
# 摄像头输入端
video_path = 0
#保存视频路径
save_path="./result.avi"
#是否保存检测结果视频
storable=True
# 目标检测类别总数
num_classes = 80
# 输入图像的尺寸
input_size = 416
#从video_path中加载视频
#若video_path=0加载照相机中视频若video_path="str"加载str路径下的视频
vid = cv2.VideoCapture(video_path)
#获得fps值
fps = vid.get(cv2.CAP_PROP_FPS)
#获取vid的每一帧图像大小
size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
#声明保存视频的路径、视频编码格式、fps、图像尺寸大小
videoWriter = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('I', '4', '2', '0'), fps, size)

video_demo(return_elements, pb_file, vid, num_classes, input_size,storable)

`