Closed abnerLing closed 2 years ago
I can confirm this issue. It happens on the current master (but did not occur in #3087 ?)
It seems that the text cleaner tries to load non_linguistic_symbols
from a file that doesn't exist. In config.yaml
:
non_linguistic_symbols: data/nlsyms.txt
Then the code crashes here: https://github.com/espnet/espnet/blob/fe551ff9370dbb95507d74701c139743b180fa59/espnet2/text/char_tokenizer.py#L20-L27
To reproduce, with a freshly set-up espnet:
from espnet_model_zoo.downloader import ModelDownloader
d = ModelDownloader(cachedir="./modelcache")
wsjmodel = d.download_and_unpack("kamo-naoyuki/wsj")
# load the example file included in the ESPnet repository
import soundfile
speech, rate = soundfile.read("./test_utils/ctc_align_test.wav")
# CTC segmentation
from espnet2.bin.asr_align import CTCSegmentation
aligner = CTCSegmentation( **wsjmodel , fs=rate )
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-1-9437fb12186a> in <module>
7 # CTC segmentation
8 from espnet2.bin.asr_align import CTCSegmentation
----> 9 aligner = CTCSegmentation( **wsjmodel , fs=rate )
/xxx/espnet/espnet2/bin/asr_align.py in __init__(self, asr_train_config, asr_model_file, fs, ngpu, batch_size, dtype, kaldi_style_text, text_converter, time_stamps, **ctc_segmentation_args)
237 )
238 asr_model.to(dtype=getattr(torch, dtype)).eval()
--> 239 self.preprocess_fn = ASRTask.build_preprocess_fn(asr_train_args, False)
240
241 # Warn for nets with high memory consumption on long audio files
/xxx/espnet/espnet2/tasks/asr.py in build_preprocess_fn(cls, args, train)
291 assert check_argument_types()
292 if args.use_preprocessor:
--> 293 retval = CommonPreprocessor(
294 train=train,
295 token_type=args.token_type,
/xxx/espnet/espnet2/train/preprocessor.py in __init__(self, train, token_type, token_list, bpemodel, text_cleaner, g2p_type, unk_symbol, space_symbol, non_linguistic_symbols, delimiter, rir_scp, rir_apply_prob, noise_scp, noise_apply_prob, noise_db_range, speech_volume_normalize, speech_name, text_name)
159 self.text_cleaner = TextCleaner(text_cleaner)
160
--> 161 self.tokenizer = build_tokenizer(
162 token_type=token_type,
163 bpemodel=bpemodel,
/xxx/espnet/espnet2/text/build_tokenizer.py in build_tokenizer(token_type, bpemodel, non_linguistic_symbols, remove_non_linguistic_symbols, space_symbol, delimiter, g2p_type)
44
45 elif token_type == "char":
---> 46 return CharTokenizer(
47 non_linguistic_symbols=non_linguistic_symbols,
48 space_symbol=space_symbol,
/xxx/espnet/espnet2/text/char_tokenizer.py in __init__(self, non_linguistic_symbols, space_symbol, remove_non_linguistic_symbols)
22 elif isinstance(non_linguistic_symbols, (Path, str)):
23 non_linguistic_symbols = Path(non_linguistic_symbols)
---> 24 with non_linguistic_symbols.open("r", encoding="utf-8") as f:
25 self.non_linguistic_symbols = set(line.rstrip() for line in f)
26 else:
/usr/lib/python3.9/pathlib.py in open(self, mode, buffering, encoding, errors, newline)
1240 the built-in open() function does.
1241 """
-> 1242 return io.open(self, mode, buffering, encoding, errors, newline,
1243 opener=self._opener)
1244
/usr/lib/python3.9/pathlib.py in _opener(self, name, flags, mode)
1108 def _opener(self, name, flags, mode=0o666):
1109 # A stub for the opener argument to built-in open()
-> 1110 return self._accessor.open(self, flags, mode)
1111
1112 def _raw_open(self, flags, mode=0o777):
FileNotFoundError: [Errno 2] No such file or directory: 'data/nlsyms.txt'
Thanks @lumaku , I also tested with a pretrained librispeech model, and it searches for "exp/asr_stats_raw_bpe5000_sp/train/feats_stats.npz". You can work around that issue by running the asr_align.py script in the directory before exp and it works fine but I'm not sure if that's intentional.
Traceback (most recent call last):
File "espnet2/bin/asr_align.py", line 827, in <module>
main()
File "espnet2/bin/asr_align.py", line 823, in main
ctc_align(**kwargs)
File "espnet2/bin/asr_align.py", line 632, in ctc_align
aligner = CTCSegmentation(**model, **kwargs)
File "espnet2/bin/asr_align.py", line 235, in __init__
asr_model, asr_train_args = ASRTask.build_model_from_file(
File "/home/abner/work/espnet/espnet2/tasks/abs_task.py", line 1776, in build_model_from_file
model = cls.build_model(args)
File "/home/abner/work/espnet/espnet2/tasks/asr.py", line 380, in build_model
normalize = normalize_class(**args.normalize_conf)
File "/home/abner/work/espnet/espnet2/layers/global_mvn.py", line 41, in __init__
stats = np.load(stats_file)
File "/home/abner/anaconda3/envs/es/lib/python3.8/site-packages/numpy/lib/npyio.py", line 417, in load
fid = stack.enter_context(open(os_fspath(file), "rb"))
FileNotFoundError: [Errno 2] No such file or directory: 'exp/asr_stats_raw_bpe5000_sp/train/feats_stats.npz'
The issue with data/nlsyms.txt'
is specific to the CTC segmentation module, because it additionally uses a tokenizer, which the Speech2Text module not does.
About the second issue, I'm not sure. It should be possible to run the script anywhere, assumed the python dependencies are in PYTHONPATH. Does this error also happen when you import the model with Speech2Text using
speech2text = Speech2Text(**model)
, or, with espnet2/bin/asr_inference.py
?
I've tried testing out the CTC segmentation example with ESPnet2 and I get the same error of "No such file or directory: 'data/nlsyms.txt'." This occurs when using the below script and also when running the exact same python code in the example.
espnet2/bin/asr_align.py --asr_train_config config.yaml --asr_model_file model.pth --audio audio.wav --text text.txt --output segments
Traceback (most recent call last): File "espnet2/bin/asr_align.py", line 827, in <module> main() File "espnet2/bin/asr_align.py", line 823, in main ctc_align(**kwargs) File "espnet2/bin/asr_align.py", line 632, in ctc_align aligner = CTCSegmentation(**model, **kwargs) File "espnet2/bin/asr_align.py", line 239, in __init__ self.preprocess_fn = ASRTask.build_preprocess_fn(asr_train_args, False) File "/home/abner/work/espnet/espnet2/tasks/asr.py", line 293, in build_preprocess_fn retval = CommonPreprocessor( File "/home/abner/work/espnet/espnet2/train/preprocessor.py", line 161, in __init__ self.tokenizer = build_tokenizer( File "/home/abner/work/espnet/espnet2/text/build_tokenizer.py", line 46, in build_tokenizer return CharTokenizer( File "/home/abner/work/espnet/espnet2/text/char_tokenizer.py", line 24, in __init__ with non_linguistic_symbols.open("r", encoding="utf-8") as f: File "/home/abner/anaconda3/envs/es/lib/python3.8/pathlib.py", line 1222, in open return io.open(self, mode, buffering, encoding, errors, newline, File "/home/abner/anaconda3/envs/es/lib/python3.8/pathlib.py", line 1078, in _opener return self._accessor.open(self, flags, mode) FileNotFoundError: [Errno 2] No such file or directory: 'data/nlsyms.txt'
Basic environments: