3sakshij commented 3 months ago

While running the asr_prep_json.py code in fairseq/examples/datasets I am getting exception of failed to decode the audio for few of .wav files. I don't understand why I am getting this error.I tried searching it in the existing issues and also on torchaudio but I still don't find kindly let me know how to fixthis.


from future import absolute_import, division, print_function, unicode_literals

import argparse import concurrent.futures import json import multiprocessing import os from collections import namedtuple from itertools import chain

import sentencepiece as spm from fairseq.data import Dictionary


def process_sample(aud_path, lable, utt_id, sp, tgt_dict): import torchaudio

input = {}
output = {}
si= t(aud_path)
input["length_ms"] = int(si.num_frames / si.num_channels / si.sample_rate / MILLISECONDS_TO_SECONDS)
input["path"] = aud_path

token = " ".join(sp.EncodeAsPieces(lable))
ids = tgt_dict.encode_line(token, append_eos=False)
output["text"] = lable
output["token"] = token
output["tokenid"] = ", ".join(map(str, [t.tolist() for t in ids]))
return {utt_id: {"input": input, "output": output}}

def main(): parser = argparse.ArgumentParser() parser.add_argument( "--audio-dirs", nargs="+", default=["-"], required=True, help="input directories with audio files", ) parser.add_argument( "--labels", required=True, help="aggregated input labels with format per line", type=argparse.FileType("r", encoding="UTF-8"), ) parser.add_argument( "--spm-model", required=True, help="sentencepiece model to use for encoding", type=argparse.FileType("r", encoding="UTF-8"), ) parser.add_argument( "--dictionary", required=True, help="file to load fairseq dictionary from", type=argparse.FileType("r", encoding="UTF-8"), ) parser.add_argument("--audio-format", choices=["flac", "wav"], default="wav") parser.add_argument( "--output", required=True, type=argparse.FileType("w"), help="path to save json output", ) args = parser.parse_args()

sp = spm.SentencePieceProcessor()

tgt_dict = Dictionary.load(args.dictionary)

labels = {}
for line in args.labels:
    #print(line.split(" ", 1))
    (utt_id, label) = line.split(" ", 1)
    labels[utt_id] = label
if len(labels) == 0:
    raise Exception("No labels found in ", args.labels_path)

Sample = namedtuple("Sample", "aud_path utt_id")
samples = []
for path, _, files in chain.from_iterable(
    os.walk(path) for path in args.audio_dirs
    for f in files:
        if f.endswith(args.audio_format):
            if len(os.path.splitext(f)) != 2:
                raise Exception("Expect <utt_id.extension> file name. Got: ", f)
            utt_id = os.path.splitext(f)[0]
            if utt_id not in labels:
            samples.append(Sample(os.path.join(path, f), utt_id))

utts = {}
num_cpu = multiprocessing.cpu_count()
with concurrent.futures.ThreadPoolExecutor(max_workers=num_cpu) as executor:
    future_to_sample = {
            process_sample, s.aud_path, labels[s.utt_id], s.utt_id, sp, tgt_dict
        ): s
        for s in samples
    for future in concurrent.futures.as_completed(future_to_sample):
        url = future_to_sample[future]

            data = future.result()
        except Exception as exc:
            print("generated an exception: ", exc,url)
json.dump({"utts": utts}, args.output, indent=4)

if name == "main": main()

I tried printing the audios which is causing this issue of failing to decode. but I didn't get why I am getting the issue.