This is mainly due to the splitting error of long audio files. The following is the adjusted script to generate audio chunks of equal length (8s). I specifically remove the youtube-dl downloading and process a local audio file because I do not have access to YouTube.

!/bin/sh

if [ "$#" -ne 3 ]; then echo "Usage: $0 " exit fi

url=$1 chunk_size=$2 dataset_path=$3

downloaded="joint.wav"

rm -f $downloaded

format=$(youtube-dl -F $url | grep audio | sed -r 's|([0-9]+).*|\1|g' | tail -n 1)

youtube-dl $url -f $format -o $downloaded

converted="joint2.wav" ffmpeg -i $downloaded -ac 1 -ab 16k -ar 16000 $converted

mkdir $dataset_path length=$(ffprobe -i $converted -show_entries format=duration -v quiet -of csv="p=0") end=$(echo "$length / $chunk_size - 1" | bc) echo "splitting..." for i in $(seq 0 $end); do ffmpeg -hide_banner -loglevel error -ss $(($i * $chunk_size)) -t $chunk_size -i $converted "$dataset_path/$i.wav" done echo "done" rm -f $converted

deepsound-project / samplernn-pytorch

Sizes of tensors mismatch in dimension 0 #19

!/bin/sh

rm -f $downloaded

format=$(youtube-dl -F $url | grep audio | sed -r 's|([0-9]+).*|\1|g' | tail -n 1)

youtube-dl $url -f $format -o $downloaded