Open kobakos opened 4 months ago
@kobakos beatnetのonnxをいただくことは可能でしょうか?
madmomで下記のエラーになる。
Traceback (most recent call last):
File "/Users/kyakuno/Desktop/ailia/ailia-models-ax/audio_processing/beatnet/beatnet.py", line 4, in <module>
from madmom.features import DBNDownBeatTrackingProcessor
File "/usr/local/lib/python3.11/site-packages/madmom/__init__.py", line 24, in <module>
from . import audio, evaluation, features, io, ml, models, processors, utils
File "/usr/local/lib/python3.11/site-packages/madmom/audio/__init__.py", line 27, in <module>
from . import comb_filters, filters, signal, spectrogram, stft
File "madmom/audio/comb_filters.pyx", line 15, in init madmom.audio.comb_filters
File "/usr/local/lib/python3.11/site-packages/madmom/processors.py", line 23, in <module>
from collections import MutableSequence
ImportError: cannot import name 'MutableSequence' from 'collections' (/usr/local/Homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/collections/__init__.py)
@kobakos madmomをlibrosaのstftなどに置き換え可能でしょうか?
madmom
from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
from madmom.audio.stft import ShortTimeFourierTransformProcessor
from madmom.audio.spectrogram import (
FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor,
SpectrogramDifferenceProcessor)
from madmom.processors import ParallelProcessor, SequentialProcessor
# feature extractor that extracts magnitude spectrogram and its differences
class LOG_SPECT(FeatureModule):
def __init__(self, num_channels=1, sample_rate=22050, win_length=2048, hop_size=512, n_bands=[12], mode='online'):
sig = SignalProcessor(num_channels=num_channels, win_length=win_length, sample_rate=sample_rate)
self.sample_rate = sample_rate
self.hop_length = hop_size
self.num_channels = num_channels
multi = ParallelProcessor([])
frame_sizes = [win_length]
num_bands = n_bands
for frame_size, num_bands in zip(frame_sizes, num_bands):
if mode == 'online' or mode == 'offline':
frames = FramedSignalProcessor(frame_size=frame_size, hop_size=hop_size)
else: # for real-time and streaming modes
frames = FramedSignalProcessor(frame_size=frame_size, hop_size=hop_size, num_frames=4)
stft = ShortTimeFourierTransformProcessor() # caching FFT window
filt = FilteredSpectrogramProcessor(
num_bands=num_bands, fmin=30, fmax=17000, norm_filters=True)
spec = LogarithmicSpectrogramProcessor(mul=1, add=1)
diff = SpectrogramDifferenceProcessor(
diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack)
# process each frame size with spec and diff sequentially
multi.append(SequentialProcessor((frames, stft, filt, spec, diff)))
# stack the features and process everything sequentially
self.pipe = SequentialProcessor((sig, multi, np.hstack))
def process_audio(self, audio):
feats = self.pipe(audio)
return feats.T
chatgptでlibrosaにしてもらったもの(動作未確認)
import numpy as np
import librosa
import librosa.display
class LOG_SPECT:
def __init__(self, sample_rate=22050, win_length=2048, hop_length=512, n_mels=128):
self.sample_rate = sample_rate
self.win_length = win_length
self.hop_length = hop_length
self.n_mels = n_mels
def process_audio(self, audio_path):
y, sr = librosa.load(audio_path, sr=self.sample_rate)
# Calculate the short-time Fourier transform (STFT)
stft = librosa.stft(y, n_fft=self.win_length, hop_length=self.hop_length)
# Get the magnitude spectrogram
mag_spec = np.abs(stft)
# Convert to mel scale
mel_spec = librosa.feature.melspectrogram(S=mag_spec, sr=sr, n_mels=self.n_mels)
# Convert to log scale
log_mel_spec = librosa.power_to_db(mel_spec)
# Compute the first order difference (delta)
delta = librosa.feature.delta(log_mel_spec)
# Combine the log mel spectrogram and its delta
log_mel_spec_with_delta = np.vstack([log_mel_spec, delta])
return log_mel_spec_with_delta.T
# 使用例
# feature_extractor = LOG_SPECT()
# features = feature_extractor.process_audio('path/to/audio/file.wav')
エラーのもとになっているfeatures.pyはLOG_SPECTだけじゃなくてDBNDownBeatTrackingProcessorやparticle_filtering_cascadeを使うときにも必要なので、LOG_SPECTの修正だけではDBNと粒子フィルタどちらの推論モードでも依然エラーは出てしまいます。必要なコードだけmadmomからとってきて同じディレクトリに入れることができないかとも思いましたがCythonが使われているのでこれも簡単ではなさそう。 GitHubにある実装ではインポートの問題も解消されているのですが、pipのやつは古いままになっているのでrequirements.txtによる対応は難しそうです。
https://github.com/axinc-ai/ailia-models/issues/1450