MStypulkowski / diffused-heads

Official repository for Diffused Heads: Diffusion Models Beat GANs on Talking-Face Generation
Other
471 stars 33 forks source link

unable to generate inference #28

Open shahidmuneer-skku opened 5 months ago

shahidmuneer-skku commented 5 months ago

I have tried to generate inference but the inference is returned as zero byte. What is the issue?

s272134690 commented 2 months ago

I encountered the same situation.

qinb commented 2 months ago

您好,我已收到!

akarinmoe commented 4 days ago

Hi, I encounter the same question and solve it as follows: I check the code and discover a conflict between FFmpeg (apt installed) and FFmpeg_python in my environment To address it, replace the original default libopenh264 with libx264 Make sure the FFmpeg format:

out = ffmpeg.output(
    *inputs,
    path,
    loglevel="info",
    vcodec='libx264',
    acodec='aac'
).overwrite_output()

Or you can simply do the following steps: First run

conda install -c conda-forge ffmpeg openh264

Then update the save_video function in utils.py:

def save_video(path, video, fps=25, scale=2, audio=None, audio_rate=16000, overlay_pts=None, ffmpeg_experimental=False):
    try:
        # Check if the output path exists
        if not os.path.exists(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))
        print(f"Output directory: {os.path.dirname(path)}")

        success = True
        out_size = (scale * video.shape[-1], scale * video.shape[-2])
        print(f"Output size: {out_size}")

        # Create a temporary video path
        video_path = os.path.join(os.path.dirname(path), "temp_video.mp4")
        print(f"Temporary video path: {video_path}")

        # Process the input video
        if torch.is_tensor(video):
            vid = video.squeeze().detach().cpu().numpy()
        else:
            vid = video.copy()  # Make a copy so that we don't alter the original object

        print(f"Video shape: {vid.shape}")

        if np.min(vid) < 0:
            vid = 127 * vid + 127
        elif np.max(vid) <= 1:
            vid = 255 * vid
        print(f"Video min/max after normalization: {vid.min()}/{vid.max()}")

        is_color = True
        if vid.ndim == 3:
            is_color = False
        print(f"Is video color: {is_color}")

        # Initialize the VideoWriter
        writer = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*"mp4v"), float(fps), out_size, isColor=is_color)
        if not writer.isOpened():
            print("Error: VideoWriter failed to initialize.")
            return False

        # Write each frame
        for i, frame in enumerate(vid):
            print(f"Processing frame {i+1}/{len(vid)}")
            if is_color:
                frame = cv2.cvtColor(np.rollaxis(frame, 0, 3), cv2.COLOR_RGB2BGR)

            if scale != 1:
                frame = cv2.resize(frame, out_size)

            write_frame = frame.astype('uint8')

            if overlay_pts is not None:
                for pt in overlay_pts[i]:
                    cv2.circle(write_frame, (int(scale * pt[0]), int(scale * pt[1])), 2, (0, 0, 0), -1)

            writer.write(write_frame)
        writer.release()
        print(f"Temporary video file saved at: {video_path}")

        # Check the size of the temporary file
        if not os.path.isfile(video_path) or os.path.getsize(video_path) == 0:
            print("Error: Temporary video file is empty.")
            return False
        print(f"Temporary video file size: {os.path.getsize(video_path)} bytes")

        # Add audio (optional)
        inputs = [ffmpeg.input(video_path)['v']]
        if audio is not None:
            audio_path = os.path.splitext(video_path)[0] + "_audio.wav"
            save_audio(audio_path, audio, audio_rate)
            if not os.path.isfile(audio_path) or os.path.getsize(audio_path) == 0:
                print("Error: Audio file is empty.")
                return False
            print(f"Audio file saved at: {audio_path}, size: {os.path.getsize(audio_path)} bytes")
            inputs += [ffmpeg.input(audio_path)['a']]

        # Combine the final video file
        try:
            if ffmpeg_experimental:
                out = ffmpeg.output(*inputs, path, strict='-2', loglevel="info", vcodec='h264').overwrite_output()
            else:
                out = ffmpeg.output(*inputs, path, loglevel="info", vcodec='h264').overwrite_output()

            print("Running FFmpeg...")
            out.run(quiet=False)
            print(f"Final video saved at: {path}")
        except Exception as e:
            print(f"Error during FFmpeg processing: {str(e)}")
            success = False

        # Clean up temporary files
        if audio is not None and os.path.isfile(audio_path):
            os.remove(audio_path)
        if os.path.isfile(video_path):
            os.remove(video_path)

        return success
    except Exception as e:
        print(f"Unexpected error: {str(e)}")
        return False

The updated save_video would report the error and that's how I find out the problem