OpenTalker / SadTalker

[CVPR 2023] SadTalker:Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation
https://sadtalker.github.io/
Other
11.12k stars 2.08k forks source link

音画不同步 #917

Open haomole opened 1 month ago

haomole commented 1 month ago
eoffermann commented 1 month ago

Translation: The picture is normal but the sound is delayed.

haomole commented 4 weeks ago

I have fixed this issue:

src\utils\videoio.py:

import shutil
import uuid
import os
import cv2

def load_video_to_cv2(input_path):
    video_stream = cv2.VideoCapture(input_path)
    fps = video_stream.get(cv2.CAP_PROP_FPS)
    full_frames = []
    while True:
        still_reading, frame = video_stream.read()
        if not still_reading:
            video_stream.release()
            break
        full_frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    return full_frames, fps

def change_frame_rate(input_path, output_path, fps):
    temp_file = str(uuid.uuid4()) + '.mp4'
    cmd = (r'ffmpeg -y -hide_banner -loglevel error -i "%s" -r %d -qscale 0 "%s"') % (input_path, fps, temp_file)
    os.system(cmd)
    shutil.move(temp_file, output_path)

def save_video_with_watermark(video, audio, save_path, watermark=False, target_fps=30):
    temp_video = str(uuid.uuid4()) + '_video.mp4'
    change_frame_rate(video, temp_video, target_fps)

    temp_file = str(uuid.uuid4()) + '.mp4'
    cmd = (r'ffmpeg -i "%s" -i "%s" '
       r'-c:v libx264 -b:v 5000k -maxrate 5000k -bufsize 10000k '
       r'-c:a aac -b:a 122k -ac 2 -ar 44100 -strict experimental "%s" ') % (temp_video, audio, temp_file)

    os.system(cmd)

    try:
        if not watermark:
            shutil.move(temp_file, save_path)
        else:
            try:
                # check if stable-diffusion-webui
                import webui
                from modules import paths
                watermark_path = paths.script_path + "/extensions/SadTalker/docs/sadtalker_logo.png"
            except ImportError:
                # get the root path of sadtalker
                dir_path = os.path.dirname(os.path.realpath(__file__))
                watermark_path = dir_path + "/../../docs/sadtalker_logo.png"

            cmd = (r'ffmpeg -y -hide_banner -loglevel error -i "%s" -i "%s" '
                   r'-filter_complex "[1]scale=100:-1[wm];[0][wm]overlay=(main_w-overlay_w)-10:10" -c:v copy -c:a aac -b:a 128k -ar 16000 -shortest "%s"') % (temp_file, watermark_path, save_path)
            os.system(cmd)
            os.remove(temp_file)
    except Exception as e:
        print("Error:", e)
    finally:
        os.remove(temp_video)