--- Preparing pronunciations for OOV words ...
Traceback (most recent call last):
File "/home/kaldi/kaldi/tools/sequitur-g2p/bin/g2p.py", line 4, in
import('pkg_resources').run_script('sequitur==1.0a1', 'g2p.py')
File "/usr/lib/python2.7/dist-packages/pkg_resources/init.py", line 719, in run_script
self.require(requires)[0].run_script(script_name, ns)
File "/usr/lib/python2.7/dist-packages/pkg_resources/init.py", line 1504, in run_script
exec(code, namespace, namespace)
File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/EGG-INFO/scripts/g2p.py", line 268, in
tool.run(main, options, args)
File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/tool.py", line 63, in run
status = runMain(main, options, args)
File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/tool.py", line 99, in runMain
status = main(options, args)
File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/EGG-INFO/scripts/g2p.py", line 214, in main
mainApply(translator, options)
File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/EGG-INFO/scripts/g2p.py", line 164, in mainApply
for word, left in words:
File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/EGG-INFO/scripts/g2p.py", line 100, in readApply
for line in gOpenIn(fname, defaultEncoding):
File "/usr/lib/python2.7/codecs.py", line 630, in next
line = self.readline()
File "/usr/lib/python2.7/codecs.py", line 545, in readline
data = self.read(readsize, firstline=True)
File "/usr/lib/python2.7/codecs.py", line 488, in read
newdata = self.stream.read(size)
File "/usr/lib/python2.7/encodings/ascii.py", line 26, in decode
return codecs.ascii_decode(input, self.errors)[0]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xd0 in position 12: ordinal not in range(128)
File is utf-8 encoding and contains russian cyrillic symbols. I try use --encoding parameter, but it did not change the situation.
When i run ./run.sh for training... i see:
--- Preparing pronunciations for OOV words ... Traceback (most recent call last): File "/home/kaldi/kaldi/tools/sequitur-g2p/bin/g2p.py", line 4, in import('pkg_resources').run_script('sequitur==1.0a1', 'g2p.py') File "/usr/lib/python2.7/dist-packages/pkg_resources/init.py", line 719, in run_script self.require(requires)[0].run_script(script_name, ns) File "/usr/lib/python2.7/dist-packages/pkg_resources/init.py", line 1504, in run_script exec(code, namespace, namespace) File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/EGG-INFO/scripts/g2p.py", line 268, in tool.run(main, options, args) File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/tool.py", line 63, in run status = runMain(main, options, args) File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/tool.py", line 99, in runMain status = main(options, args) File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/EGG-INFO/scripts/g2p.py", line 214, in main mainApply(translator, options) File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/EGG-INFO/scripts/g2p.py", line 164, in mainApply for word, left in words: File "/home/kaldi/kaldi/tools/sequitur-g2p/lib/python2.7/site-packages/sequitur-1.0a1-py2.7-linux-x86_64.egg/EGG-INFO/scripts/g2p.py", line 100, in readApply for line in gOpenIn(fname, defaultEncoding): File "/usr/lib/python2.7/codecs.py", line 630, in next line = self.readline() File "/usr/lib/python2.7/codecs.py", line 545, in readline data = self.read(readsize, firstline=True) File "/usr/lib/python2.7/codecs.py", line 488, in read newdata = self.stream.read(size) File "/usr/lib/python2.7/encodings/ascii.py", line 26, in decode return codecs.ascii_decode(input, self.errors)[0] UnicodeDecodeError: 'ascii' codec can't decode byte 0xd0 in position 12: ordinal not in range(128)
File is utf-8 encoding and contains russian cyrillic symbols. I try use --encoding parameter, but it did not change the situation.