ruediger / VobSub2SRT

Converts VobSub subtitles (.idx/.srt format) into .srt subtitles.
GNU General Public License v3.0
293 stars 65 forks source link

I've written a script that does VobSub extraction and conversion using VobSub2SRT #105

Open guibregolin opened 5 months ago

guibregolin commented 5 months ago

I'd like to contribute it to the project, here's the script in full just in case:

import argparse
import subprocess
import os
import re

def read_language_from_idx(idx_file_path):
    with open(idx_file_path, 'r', encoding='ISO-8859-1') as file:
        content = file.read()
    match = re.search(r'^id: (\w+),', content, re.MULTILINE)
    if match:
        return match.group(1)
    return None

def extract_subtitles_with_mkvextract(video_file, output_folder):
    cmd_info = ["mkvmerge", "-i", video_file]
    result = subprocess.run(cmd_info, capture_output=True, text=True, check=True)
    tracks_info = result.stdout

    subtitle_tracks = re.findall(r"Track ID (\d+): subtitles", tracks_info)

    subtitle_files = []
    for track_id in subtitle_tracks:
        output_idx = os.path.join(output_folder, f"subtitles_track{track_id}.idx")
        cmd_extract = ["mkvextract", video_file, "tracks", f"{track_id}:{output_idx}"]
        subprocess.run(cmd_extract, check=True)

        output_sub = output_idx.replace('.idx', '.sub')
        if os.path.exists(output_idx) and os.path.exists(output_sub):
            subtitle_files.append((output_idx, output_sub))

    return subtitle_files

def filter_subtitles_by_language(subtitle_files, languages):
    filtered_files = []
    for idx_file, sub_file in subtitle_files:
        lang = read_language_from_idx(idx_file)
        if languages is None or lang in languages:
            filtered_files.append((idx_file, sub_file, lang))
    return filtered_files

def convert_subtitles_to_srt(subtitle_files, output_folder):
    srt_files_with_lang = []
    for idx_file, sub_file, lang in subtitle_files:
        base_name = os.path.splitext(idx_file)[0]
        vobsub2srt_cmd = ["vobsub2srt", base_name]
        subprocess.run(vobsub2srt_cmd, check=False)

        output_srt = f"{base_name}.srt"
        if os.path.exists(output_srt):
            srt_files_with_lang.append((output_srt, lang))  # Keep track of language
    return srt_files_with_lang

def mux_subtitles(video_file, subtitle_files, output_folder):
    temp_output_file = os.path.join(output_folder, "temp_output.mkv")

    cmd = ["mkvmerge", "-o", temp_output_file, video_file]
    for srt_file, lang in subtitle_files:  # Expect subtitle_files to include language code
        # Use ISO 639-2 language code if available, default to 'und' otherwise
        lang_code = 'und' if lang is None else lang
        cmd.extend(["--language", f"0:{lang_code}", srt_file])
    subprocess.run(cmd, check=True)

    os.replace(temp_output_file, video_file)

def process_video_files(video_files, output_folder, languages):
    for video_file in video_files:
        print(f"Processing {video_file} for languages: {languages}")

        os.makedirs(output_folder, exist_ok=True)

        extracted_subtitle_files = extract_subtitles_with_mkvextract(video_file, output_folder)
        if languages:
            languages_set = set(languages.split(','))
            filtered_subtitle_files = filter_subtitles_by_language(extracted_subtitle_files, languages_set)
        else:
            filtered_subtitle_files = [(idx, sub, None) for idx, sub in extracted_subtitle_files]

        srt_files_with_lang = convert_subtitles_to_srt(filtered_subtitle_files, output_folder)

        mux_subtitles(video_file, srt_files_with_lang, output_folder)

        # Cleanup step omitted for brevity

        print(f"Finished processing {video_file}. The original file has been updated with new SRT subtitles.")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert VobSub subtitles to SRT for specified languages and mux back into the MKV file.")
    parser.add_argument("video_files", nargs='+', help="Paths to video files to process")
    parser.add_argument("--output", default=".", help="Output folder for temporary files")
    parser.add_argument("--languages", help="Comma-separated list of ISO 639-1 language codes to convert (e.g., 'en,es'). Processes all languages if omitted.")
    args = parser.parse_args()

    process_video_files(args.video_files, args.output, args.languages)