Closed Test-Jim closed 4 months ago
最好可以提供一个可复现的demo
最好可以提供一个可复现的demo
你好,这是demo,可以直接执行。
import cv2 # 导入OpenCV库
import difflib
from skimage.metrics import structural_similarity # 导入结构相似度指标
from paddleocr import PaddleOCR
import pysrt
def image_extract_title(img):
title = "" # 初始化字幕内容为空字符串
ocr_results = ocr.ocr(img, cls=False)#左上、右上、右下、左下
if ocr_results == [None]:
return title
if len(ocr_results[0])>=2 :
print(r'多条字幕:',ocr_results)
if ocr_results[0][0][1][1]>0.89:
title = ocr_results[0][0][1][0]+ocr_results[0][1][1][0]
return title
for result in ocr_results:
title=result[0][1][0]
return title # 返回提取的字幕内容和位置信息
def video_extra_title(video_path, output_srt_path,y1,y2):
subsampling = 5 # 采样率设定为5
similarity_thresh = 0.8 # 相似度阈值设定为0.8
cap = cv2.VideoCapture(video_path) # 打开视频文件
w = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # 获取视频帧宽度
h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # 获取视频帧高度
count = cap.get(cv2.CAP_PROP_FRAME_COUNT) # 获取视频总帧数
fps = cap.get(cv2.CAP_PROP_FPS) # 获取视频帧率
print('Video info w: {}, h: {}, count: {}, fps: {}'.format(w, h, count, fps))
cur = 0 # 当前帧数计数
detected = False # 是否检测到字幕标志
content = '' # 字幕内容
start = 0 # 字幕起始帧数
ref_gray_image = None # 参考灰度图像
def _add_subs(end):
"""
添加新的字幕
:param end: 字幕结束帧数
"""
global index
start_total_seconds = start / fps
start_hours = int(start_total_seconds // 3600)
start_minutes = int((start_total_seconds % 3600) // 60)
start_seconds = int(start_total_seconds % 60)
start_milliseconds = int((start_total_seconds - int(start_total_seconds)) * 1000)
end_total_seconds = end / fps
end_hours = int(end_total_seconds // 3600)
end_minutes = int((end_total_seconds % 3600) // 60)
end_seconds = int(end_total_seconds % 60)
end_milliseconds = int((end_total_seconds - int(end_total_seconds)) * 1000)
if len(subs)>0:
if difflib.SequenceMatcher(None, subs[-1].text, content.strip()).quick_ratio()>0.8:
subs[-1].end=pysrt.SubRipTime(hours=end_hours, minutes=end_minutes, seconds=end_seconds, milliseconds=end_milliseconds)
return
end = pysrt.SubRipTime(hours=end_hours, minutes=end_minutes, seconds=end_seconds,milliseconds=end_milliseconds)
text = content.strip().replace('7', '了')
sub=pysrt.SubRipItem(index=index,start=pysrt.SubRipTime(hours=start_hours, minutes=start_minutes, seconds=start_seconds,milliseconds=start_milliseconds)
,end=end,text=text)
print('写入:',end,text)
subs.append(sub)
index+=1
# 主循环,处理视频的每一帧
while cap.isOpened():
ret, frame = cap.read() # 读取视频的一帧
if not ret: # 如果未成功读取帧则跳出循环
if detected:
_add_subs(cur)
break
frame = frame[y1:y2, :]
cur += 1
if cur % subsampling != 0: # 根据采样率决定是否继续处理当前帧
continue
if detected: # 如果已经检测到字幕
# 判断当前帧与参考帧的相似度
hyp_gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
similarity = structural_similarity(hyp_gray_image, ref_gray_image)
#print('similarity:',similarity)
if similarity > similarity_thresh: # 如果相似度高,则为同一字幕
continue
else:
# 记录当前字幕信息
_add_subs(cur - subsampling)
detected = False
else: # 如果未检测到字幕
content = image_extract_title(frame) # 提取当前帧的字幕内容和位置
if content == "": # 如果未提取到字幕则标记未检测到字幕
detected = False
else:
detected = True # 检测到字幕
start = cur
ref_gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cap.release()
# 将提取的字幕信息写入SRT文件
subs.save(output_srt_path, encoding='utf-8')
if __name__ == '__main__':
import logging
logging.disable(logging.WARNING)
subs = pysrt.SubRipFile()
index = 1
ocr = PaddleOCR(use_angle_cls=False, lang="ch", use_gpu=True, show_log=False)
video_path = r'http://60.12.15.236:9803/download/%E8%B6%85%E7%BA%A7%E9%BE%99%E5%A9%BF.mp4'
output_srt_path = 'longxu_zh_.srt'
#y1,y2,w,h=find_word_location(video_path)
# y1,y2=1291, 1553
y1,y2=1109, 1261
video_extra_title(video_path, output_srt_path,y1,y2) #这里是生成字幕
最好可以提供一个可复现的demo
可以检查一下是不是截取的字幕有问题
可以检查一下是不是截取的字幕有问题 没有问题呢,这个是视频地址,您可以根据上面错误的时间点 对着视频看下,字幕是没有错的。要错就会一起错,不可能一段对,一段错。。 video_path = r'http://60.12.15.236:9803/download/%E8%B6%85%E7%BA%A7%E9%BE%99%E5%A9%BF.mp4'
应该是字幕位置没对上
字幕正确是没有问题的
字幕正确是没有问题的
谢谢
问题描述 / Problem Description
如图所示 识别出来的语义乱了,后面又好了
运行环境 / Runtime Environment
复现代码 / Reproduction Code
完整报错 / Complete Error Message
可能解决方案 / Possible solutions
附件 / Appendix