It works in Termux, directly, without prooting, yay: some submodules needed to be "hand compiled", though, or their requirements tinkered with.
The tips, which may be included as the options later on:
import pandas as pd
import json
import sys
from colorama import Fore, Style
from colorama import Fore, Back, Style
def color_code(word, score):
if score > 0.9:
return Fore.GREEN + word + Style.RESET_ALL # Green
elif score > 0.8:
return Fore.LIGHTGREEN_EX + word + Style.RESET_ALL # Light Green
elif score > 0.7:
return Fore.YELLOW + word + Style.RESET_ALL # Yellow
elif score > 0.6:
return Fore.LIGHTYELLOW_EX + word + Style.RESET_ALL # Light Yellow
elif score > 0.5:
return Fore.LIGHTRED_EX + word + Style.RESET_ALL # Light Red
else:
return Fore.RED + word + Style.RESET_ALL # Red
def process_file(filename):
# Open the file and load the JSON data
with open(filename, 'r') as f:
data = json.load(f)
# Loop over each segment in the data
for segment in data['segments']:
# Convert the 'words' list into a DataFrame
df = pd.DataFrame(segment['words'])
# Apply the function to the 'word' column
df['word'] = df.apply(lambda row: color_code(row['word'], row['score']), axis=1)
# Join the words into a sentence
sentence = ' '.join(df['word'])
print(sentence)
# Get the filename from the command-line arguments
filename = sys.argv[1]
# Call the function with the filename as an argument
process_file(filename)
And its srt version:
import pandas as pd
import json
import sys
import os
def color_code(word, score):
if score > 0.9:
return '<font color="#008000">' + word + '</font>' # Green
elif score > 0.8:
return '<font color="#32CD32">' + word + '</font>' # LimeGreen
elif score > 0.7:
return '<font color="#ADFF2F">' + word + '</font>' # GreenYellow
elif score > 0.6:
return '<font color="#FFFF00">' + word + '</font>' # Yellow
elif score > 0.5:
return '<font color="#FFA500">' + word + '</font>' # Orange
else:
return '<font color="#FF0000">' + word + '</font>' # Red
def format_time(seconds):
# Convert seconds to hh:mm:ss,ms format
hours = int(seconds // 3600)
seconds %= 3600
minutes = int(seconds // 60)
seconds %= 60
milliseconds = int((seconds % 1) * 1000)
return f'{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}'
def process_file(filename):
# Open the file and load the JSON data
with open(filename, 'r') as f:
data = json.load(f)
# Get the base filename without the extension
base_filename = os.path.splitext(filename)[0]
# Open the output .srt file
with open(f'{base_filename}_colorcoded.srt', 'w') as f:
# Loop over each segment in the data
for i, segment in enumerate(data['segments'], start=1):
# Convert the 'words' list into a DataFrame
df = pd.DataFrame(segment['words'])
# Apply the function to the 'word' column
df['word'] = df.apply(lambda row: color_code(row['word'], row['score']), axis=1)
# Join the words into a sentence
sentence = ' '.join(df['word'])
# Write the subtitle number, time range, and sentence to the .srt file
f.write(f'{i}\n')
f.write(f'{format_time(segment["start"])} --> {format_time(segment["end"])}\n')
f.write(f'{sentence}\n\n')
# Get the filename from the command-line arguments
filename = sys.argv[1]
# Call the function with the filename as an argument
process_file(filename)
It works in Termux, directly, without prooting, yay: some submodules needed to be "hand compiled", though, or their requirements tinkered with.
The tips, which may be included as the options later on:
And its srt version: