I am hesitant about doing a PR for this..it would be breaking against your readme - but i think it might help.. so I'll just suggest this..
import os
import json #new
import requests # new
from pathlib import Path
def get_available_languages(cache_file="languages_cache.json"):
url = "https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html"
# Check if the cache file exists
if os.path.exists(cache_file):
try:
with open(cache_file, 'r') as f:
return json.load(f)
except json.JSONDecodeError:
pass
# Fetch the list of available languages
try:
response = requests.get(url)
response.raise_for_status()
lines = response.text.strip().split('\n')
languages = {}
for line in lines:
line = line.strip()
if not line.startswith("<p>") or not line.endswith("</p>"):
continue
line = line[3:-4].replace(" ", "\t").strip()
parts = line.split('\t')
if len(parts) == 2:
iso_code, language = parts
iso_code = iso_code.strip()
language = language.strip()
if iso_code.lower() == "iso code" and language.lower() == "language name":
continue # Skip the header
languages[iso_code] = language
# Cache the languages
with open(cache_file, 'w') as f:
json.dump(languages, f)
return languages
except requests.RequestException as e:
raise RuntimeError(f"Failed to fetch available languages: {str(e)}")
except Exception as e:
raise RuntimeError(f"Error processing languages data: {str(e)}")
def download(lang, tgt_dir="./"):
languages = get_available_languages()
if lang not in languages:
raise ValueError(f"Language '{lang}' is not available. Please check the available languages.")
lang_fn, lang_dir = os.path.join(tgt_dir, lang + '.tar.gz'), os.path.join(tgt_dir, lang)
if os.path.exists(lang_dir):
return lang_dir
Path(tgt_dir).mkdir(parents=True, exist_ok=True)
url = f"https://dl.fbaipublicfiles.com/mms/tts/{lang}.tar.gz"
print(f"Downloading {lang} from {url}")
urlretrieve(url, lang_fn)
Path(lang_dir).mkdir(parents=True, exist_ok=True)
with tarfile.open(lang_fn) as file:
print(f"Extracting {lang} to {lang_dir}")
file.extractall(tgt_dir)
print("Done")
return lang_dir
Yould could then do..
from ttsmms import TTS, download, get_available_languages
import os
# Function to display available languages
def display_available_languages():
languages = get_available_languages()
print("Available Languages:")
for iso_code, language in languages.items():
print(f"{iso_code}: {language}")
# Display available languages
display_available_languages()
# Define the directory for saving the models
model_dir = "./mms_models"
# Specify the language code to download
lang_code = "kmr-script_arabic" # Replace with the desired language code
# Download the language model if not already downloaded
model_path = os.path.join(model_dir, lang_code)
if not os.path.exists(model_path):
download(lang_code, model_dir)
# Initialize the TTS model
tts = TTS(model_path)
This saves the rather tricky step where the end user has to figure out from a rather hard to find web page the lang codes available. Downside: if that page changes we are stuck. I have a solution though - just provide a cache of it in this repo ( I see its there as a txt file. save it as a json in the actual module)
I am hesitant about doing a PR for this..it would be breaking against your readme - but i think it might help.. so I'll just suggest this..
Yould could then do..
This saves the rather tricky step where the end user has to figure out from a rather hard to find web page the lang codes available. Downside: if that page changes we are stuck. I have a solution though - just provide a cache of it in this repo ( I see its there as a txt file. save it as a json in the actual module)