Open willwade opened 4 months ago
I'm trying to support all other models.. But to do that I really could do with a nice formatted list of all models available.. So I'm trying this..
import requests
import json
import re
def get_github_release_assets(repo, tag):
headers = {'Accept': 'application/vnd.github.v3+json'}
# Get the release ID for the specified tag
releases_url = f"https://api.github.com/repos/{repo}/releases/tags/{tag}"
response = requests.get(releases_url, headers=headers)
if response.status_code != 200:
raise Exception(f"Failed to fetch release info for tag: {tag}")
release_info = response.json()
# Get the assets
assets = []
for asset in release_info.get('assets', []):
filename = asset['name']
asset_url = asset['browser_download_url']
# Remove the file extension for further processing
filename_no_ext = re.sub(r'\.tar\.bz2|\.tar\.gz|\.zip', '', filename)
parts = filename_no_ext.split('-')
model_type = 'vits' if parts[0] == 'vits' else 'unknown'
developer = parts[1] if len(parts) > 1 else 'unknown'
if developer == 'zh':
lang_code = 'zh'
developer = parts[2] if len(parts) > 2 else 'unknown'
name = parts[3] if len(parts) > 3 else 'unknown'
quality = parts[4] if len(parts) > 4 else 'unknown'
else:
lang_code = parts[2].replace('_', '-') if len(parts) > 2 else 'unknown'
name = parts[3] if len(parts) > 3 else 'unknown'
quality = parts[4] if len(parts) > 4 else 'unknown'
if developer == 'zh':
name = parts[3] if len(parts) > 3 else 'unknown'
quality = 'unknown'
else:
lang_code = parts[2].replace('_', '-') if len(parts) > 2 else 'unknown'
name = parts[3] if len(parts) > 3 else 'unknown'
quality = parts[4] if len(parts) > 4 else 'unknown'
if len(parts) == 5:
quality = parts[-1]
if developer == 'zh' and len(parts) > 3:
lang_code = 'zh'
developer = parts[2]
name = parts[3]
quality = 'unknown'
if len(parts) == 4 and developer == 'vctk':
lang_code = 'unknown'
name = 'unknown'
quality = 'unknown'
# Determine if the asset is compressed
compression = filename.endswith(('.tar.bz2', '.tar.gz', '.zip'))
# Add asset info to the list
assets.append({
'model_type': model_type,
'developer': developer,
'language_code': lang_code,
'name': name,
'quality': quality,
'url': asset_url,
'compression': compression
})
# Convert the list of assets to JSON
assets_json = json.dumps(assets, indent=4)
return assets_json
# Example usage
repo = "k2-fsa/sherpa-onnx"
tag = "tts-models"
assets_json = get_github_release_assets(repo, tag)
print(assets_json)
But its not great.. eg
{
"model_type": "vits",
"developer": "mimic3",
"language_code": "gu-IN",
"name": "cmu",
"quality": "indic_low",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-mimic3-gu_IN-cmu-indic_low.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "piper",
"language_code": "de-DE",
"name": "ramona",
"quality": "low",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-de_DE-ramona-low.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "piper",
"language_code": "de-DE",
"name": "thorsten",
"quality": "high",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-de_DE-thorsten-high.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "piper",
"language_code": "de-DE",
"name": "thorsten",
"quality": "low",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-de_DE-thorsten-low.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "piper",
"language_code": "de-DE",
"name": "thorsten",
"quality": "medium",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-de_DE-thorsten-medium.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "piper",
"language_code": "de-DE",
"name": "thorsten_emotional",
"quality": "medium",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-de_DE-thorsten_emotional-medium.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "piper",
"language_code": "el-GR",
"name": "rapunzelina",
"quality": "low",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-el_GR-rapunzelina-low.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "piper",
"language_code": "en-GB",
"name": "alan",
"quality": "low",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-alan-low.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "piper",
"language_code": "en-GB",
"name": "alan",
"quality": "medium",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-alan-medium.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "hf",
"language_code": "hf",
"name": "keqing",
"quality": "unknown",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-keqing.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "hf",
"language_code": "hf",
"name": "theresa",
"quality": "unknown",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-theresa.tar.bz2",
"compression": true
},
{
"model_type": "vits",
"developer": "hf",
"language_code": "hf",
"name": "zenyatta",
"quality": "unknown",
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-zenyatta.tar.bz2",
"compression": true
}
any advice welcome on how those tts releases are formatted :)
any advice welcome on how those tts releases are formatted :)
I suggest that you handle models like
@csukuangfj the models released under your tts tag release. What action generates those? I'm wondering about creating a json file of all models available
@csukuangfj the models released under your tts tag release. What action generates those? I'm wondering about creating a json file of all models available
Please see https://github.com/csukuangfj/models/tree/small-fixes/.github/workflows
All workflows containing vits
in the above link.
Great. I think its probably best I do the hackier scraping approach
https://github.com/willwade/tts-wrapper/blob/main/tts_wrapper/engines/sherpaonnx/createindex.py
NB: Not including the 1107 MMS models
Just a quick FYI..
I've VERY QUICKLY (so bugs beware) added sherpa-onnx to this python tts-wrapper..
https://github.com/willwade/tts-wrapper?tab=readme-ov-file#sherpa-onnx
We do fun things like listing available voices, auto downloading models and running them.. then our abstract class deals with playing, pausing, streaming audio etc. Many aspects like pitch, volume control and SSML we obviously cant deal with sherpa-onnx. Or word events.. But hey..
I'm really just focusing on MMS models right now.. I'd love a lovely JSON file of all TTS models we could use if anyone has it and I can add them.
Needs a heap loads of testing and very welcoming to PR's. Particularly rewriting automated tests and checking/improving audio playback.. I feel there is a lag somewhere and I cant figure out where..
Use like
then
Take a look at play/pause/resume etc..
https://github.com/willwade/tts-wrapper?tab=readme-ov-file#streaming-and-playback-control