PaddlePaddle / PaddleOCR

Awesome multilingual OCR toolkits based on PaddlePaddle (practical ultra lightweight OCR system, support 80+ languages recognition, provide data annotation and synthesis tools, support training and deployment among server, mobile, embedded and IoT devices)
https://paddlepaddle.github.io/PaddleOCR/
Apache License 2.0
44.32k stars 7.83k forks source link

在识别大量图片过程种,某一区间的识别率会骤降,这是bug吗? #13341

Closed Test-Jim closed 4 months ago

Test-Jim commented 4 months ago

问题描述 / Problem Description

如图所示 1720662503056_EC28CAA2-B76B-4f32-AB69-BA66367F26CD 识别出来的语义乱了,后面又好了

运行环境 / Runtime Environment

复现代码 / Reproduction Code

完整报错 / Complete Error Message

可能解决方案 / Possible solutions

附件 / Appendix

GreatV commented 4 months ago

最好可以提供一个可复现的demo

Test-Jim commented 4 months ago

最好可以提供一个可复现的demo

你好,这是demo,可以直接执行。

import cv2  # 导入OpenCV库
import difflib
from skimage.metrics import structural_similarity  # 导入结构相似度指标
from paddleocr import PaddleOCR
import pysrt
def image_extract_title(img):
    title = ""  # 初始化字幕内容为空字符串
    ocr_results = ocr.ocr(img, cls=False)#左上、右上、右下、左下
    if ocr_results == [None]:
        return title

    if len(ocr_results[0])>=2 :
        print(r'多条字幕:',ocr_results)
        if ocr_results[0][0][1][1]>0.89:
            title = ocr_results[0][0][1][0]+ocr_results[0][1][1][0]
        return title
    for result in ocr_results:
        title=result[0][1][0]
    return title  # 返回提取的字幕内容和位置信息

def video_extra_title(video_path, output_srt_path,y1,y2):
    subsampling = 5  # 采样率设定为5
    similarity_thresh = 0.8  # 相似度阈值设定为0.8

    cap = cv2.VideoCapture(video_path)  # 打开视频文件
    w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # 获取视频帧宽度
    h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # 获取视频帧高度
    count = cap.get(cv2.CAP_PROP_FRAME_COUNT)  # 获取视频总帧数
    fps = cap.get(cv2.CAP_PROP_FPS)  # 获取视频帧率
    print('Video info w: {}, h: {}, count: {}, fps: {}'.format(w, h, count, fps))

    cur = 0  # 当前帧数计数
    detected = False  # 是否检测到字幕标志
    content = ''  # 字幕内容
    start = 0  # 字幕起始帧数
    ref_gray_image = None  # 参考灰度图像

    def _add_subs(end):
        """
        添加新的字幕
        :param end: 字幕结束帧数
        """
        global index

        start_total_seconds = start / fps
        start_hours = int(start_total_seconds // 3600)
        start_minutes = int((start_total_seconds % 3600) // 60)
        start_seconds = int(start_total_seconds % 60)
        start_milliseconds = int((start_total_seconds - int(start_total_seconds)) * 1000)

        end_total_seconds = end / fps
        end_hours = int(end_total_seconds // 3600)
        end_minutes = int((end_total_seconds % 3600) // 60)
        end_seconds = int(end_total_seconds % 60)
        end_milliseconds = int((end_total_seconds - int(end_total_seconds)) * 1000)

        if len(subs)>0:
            if difflib.SequenceMatcher(None, subs[-1].text, content.strip()).quick_ratio()>0.8:
                subs[-1].end=pysrt.SubRipTime(hours=end_hours, minutes=end_minutes, seconds=end_seconds, milliseconds=end_milliseconds)
                return

        end = pysrt.SubRipTime(hours=end_hours, minutes=end_minutes, seconds=end_seconds,milliseconds=end_milliseconds)
        text = content.strip().replace('7', '了')
        sub=pysrt.SubRipItem(index=index,start=pysrt.SubRipTime(hours=start_hours, minutes=start_minutes, seconds=start_seconds,milliseconds=start_milliseconds)
                            ,end=end,text=text)
        print('写入:',end,text)
        subs.append(sub)
        index+=1

    # 主循环,处理视频的每一帧
    while cap.isOpened():
        ret, frame = cap.read()  # 读取视频的一帧
        if not ret:  # 如果未成功读取帧则跳出循环
            if detected:
                _add_subs(cur)
            break
        frame = frame[y1:y2, :]
        cur += 1
        if cur % subsampling != 0:  # 根据采样率决定是否继续处理当前帧
            continue
        if detected:  # 如果已经检测到字幕
            # 判断当前帧与参考帧的相似度
            hyp_gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            similarity = structural_similarity(hyp_gray_image, ref_gray_image)
            #print('similarity:',similarity)
            if similarity > similarity_thresh:  # 如果相似度高,则为同一字幕
                continue
            else:
                # 记录当前字幕信息
                _add_subs(cur - subsampling)
                detected = False
        else:  # 如果未检测到字幕
            content = image_extract_title(frame)  # 提取当前帧的字幕内容和位置
            if content == "":  # 如果未提取到字幕则标记未检测到字幕
                detected = False
            else:
                detected = True  # 检测到字幕
                start = cur
                ref_gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    cap.release()

    # 将提取的字幕信息写入SRT文件
    subs.save(output_srt_path, encoding='utf-8')

if __name__ == '__main__':
    import logging
    logging.disable(logging.WARNING)
    subs = pysrt.SubRipFile()
    index = 1
    ocr = PaddleOCR(use_angle_cls=False, lang="ch", use_gpu=True, show_log=False)
    video_path = r'http://60.12.15.236:9803/download/%E8%B6%85%E7%BA%A7%E9%BE%99%E5%A9%BF.mp4'
    output_srt_path = 'longxu_zh_.srt'
    #y1,y2,w,h=find_word_location(video_path)
    # y1,y2=1291, 1553
    y1,y2=1109, 1261
    video_extra_title(video_path, output_srt_path,y1,y2)  #这里是生成字幕
Test-Jim commented 4 months ago

最好可以提供一个可复现的demo

cfd8097a4d08e5fe38f27208ab57263

GreatV commented 4 months ago

可以检查一下是不是截取的字幕有问题

Test-Jim commented 4 months ago

可以检查一下是不是截取的字幕有问题 没有问题呢,这个是视频地址,您可以根据上面错误的时间点 对着视频看下,字幕是没有错的。要错就会一起错,不可能一段对,一段错。。 video_path = r'http://60.12.15.236:9803/download/%E8%B6%85%E7%BA%A7%E9%BE%99%E5%A9%BF.mp4'

GreatV commented 4 months ago

应该是字幕位置没对上

image

810_WEE

GreatV commented 4 months ago

字幕正确是没有问题的

image

1372_你知道该怎么做了吧

Test-Jim commented 4 months ago

字幕正确是没有问题的

image

1372_你知道该怎么做了吧

谢谢