Open maic2209 opened 1 month ago
(env) (base) C:\Users\prost\Wav2Lip>python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face joseph.mp4 --audio josephvoice.mp3 Using cpu for inference. Reading video frames... Number of frames available for inference: 366 Extracting raw audio... ffmpeg version 2023-06-21-git-1bcb8a7338-essentials_build-www.gyan.dev Copyright (c) 2000-2023 the FFmpeg developers built with gcc 12.2.0 (Rev10, Built by MSYS2 project) configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-ffnvcodec --enable-nvdec --enable-nvenc --enable-d3d11va --enable-dxva2 --enable-libvpl --enable-libgme --enable-libopenmpt --enable-libopencore-amrwb --enable-libmp3lame --enable-libtheora --enable-libvo-amrwbenc --enable-libgsm --enable-libopencore-amrnb --enable-libopus --enable-libspeex --enable-libvorbis --enable-librubberband libavutil 58. 13.101 / 58. 13.101 libavcodec 60. 21.100 / 60. 21.100 libavformat 60. 9.100 / 60. 9.100 libavdevice 60. 2.100 / 60. 2.100 libavfilter 9. 8.102 / 9. 8.102 libswscale 7. 3.100 / 7. 3.100 libswresample 4. 11.100 / 4. 11.100 libpostproc 57. 2.100 / 57. 2.100 [mp3 @ 000001a0a3355d80] Estimating duration from bitrate, this may be inaccurate Input #0, mp3, from 'josephvoice.mp3': Duration: 00:00:11.89, start: 0.000000, bitrate: 127 kb/s Stream #0:0: Audio: mp3, 44100 Hz, mono, fltp, 128 kb/s Stream mapping: Stream #0:0 -> #0:0 (mp3 (mp3float) -> pcm_s16le (native)) Press [q] to stop, [?] for help Output #0, wav, to 'temp/temp.wav': Metadata: ISFT : Lavf60.9.100 Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, mono, s16, 705 kb/s Metadata: encoder : Lavc60.21.100 pcm_s16le [out#0/wav @ 000001a0a33523c0] video:0kB audio:1024kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.007440% size= 1024kB time=00:00:11.85 bitrate= 707.2kbits/s speed=73.5x Traceback (most recent call last): File "C:\Users\prost\Wav2Lip\inference.py", line 280, in main() File "C:\Users\prost\Wav2Lip\inference.py", line 225, in main mel = audio.melspectrogram(wav) File "C:\Users\prost\Wav2Lip\audio.py", line 47, in melspectrogram S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db File "C:\Users\prost\Wav2Lip\audio.py", line 95, in _linear_to_mel _mel_basis = _build_mel_basis() File "C:\Users\prost\Wav2Lip\audio.py", line 100, in _build_mel_basis return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, TypeError: mel() takes 0 positional arguments but 2 positional arguments (and 3 keyword-only arguments) were given
(env) (base) C:\Users\prost\Wav2Lip>python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face joseph.mp4 --audio josephvoice.mp3 Using cpu for inference. Reading video frames... Number of frames available for inference: 366 Extracting raw audio... ffmpeg version 2023-06-21-git-1bcb8a7338-essentials_build-www.gyan.dev Copyright (c) 2000-2023 the FFmpeg developers built with gcc 12.2.0 (Rev10, Built by MSYS2 project) configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-ffnvcodec --enable-nvdec --enable-nvenc --enable-d3d11va --enable-dxva2 --enable-libvpl --enable-libgme --enable-libopenmpt --enable-libopencore-amrwb --enable-libmp3lame --enable-libtheora --enable-libvo-amrwbenc --enable-libgsm --enable-libopencore-amrnb --enable-libopus --enable-libspeex --enable-libvorbis --enable-librubberband libavutil 58. 13.101 / 58. 13.101 libavcodec 60. 21.100 / 60. 21.100 libavformat 60. 9.100 / 60. 9.100 libavdevice 60. 2.100 / 60. 2.100 libavfilter 9. 8.102 / 9. 8.102 libswscale 7. 3.100 / 7. 3.100 libswresample 4. 11.100 / 4. 11.100 libpostproc 57. 2.100 / 57. 2.100 [mp3 @ 000002a6d5b45d80] Estimating duration from bitrate, this may be inaccurate Input #0, mp3, from 'josephvoice.mp3': Duration: 00:00:11.89, start: 0.000000, bitrate: 127 kb/s Stream #0:0: Audio: mp3, 44100 Hz, mono, fltp, 128 kb/s Stream mapping: Stream #0:0 -> #0:0 (mp3 (mp3float) -> pcm_s16le (native)) Press [q] to stop, [?] for help Output #0, wav, to 'temp/temp.wav': Metadata: ISFT : Lavf60.9.100 Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, mono, s16, 705 kb/s Metadata: encoder : Lavc60.21.100 pcm_s16le [out#0/wav @ 000002a6d5b423c0] video:0kB audio:1024kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.007440% size= 1024kB time=00:00:11.85 bitrate= 707.2kbits/s speed=84.9x Traceback (most recent call last): File "C:\Users\prost\Wav2Lip\inference.py", line 280, in main() File "C:\Users\prost\Wav2Lip\inference.py", line 225, in main mel = audio.melspectrogram(wav) File "C:\Users\prost\Wav2Lip\audio.py", line 47, in melspectrogram S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db File "C:\Users\prost\Wav2Lip\audio.py", line 95, in _linear_to_mel _mel_basis = _build_mel_basis() File "C:\Users\prost\Wav2Lip\audio.py", line 100, in _build_mel_basis return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, TypeError: mel() takes 0 positional arguments but 2 positional arguments (and 3 keyword-only arguments) were given
我又解决方案 可以联系我+q3785510550
您的電話號碼不正確,請透過 WhatsApp 給我發短信 +2347037243756
On Mon, 3 Jun 2024, 7:51 am jibingyangsf, @.***> wrote:
(env) (base) C:\Users\prost\Wav2Lip>python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face joseph.mp4 --audio josephvoice.mp3 Using cpu for inference. Reading video frames... Number of frames available for inference: 366 Extracting raw audio... ffmpeg version 2023-06-21-git-1bcb8a7338-essentials_build-www.gyan.dev Copyright (c) 2000-2023 the FFmpeg developers built with gcc 12.2.0 (Rev10, Built by MSYS2 project) configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-ffnvcodec --enable-nvdec --enable-nvenc --enable-d3d11va --enable-dxva2 --enable-libvpl --enable-libgme --enable-libopenmpt --enable-libopencore-amrwb --enable-libmp3lame --enable-libtheora --enable-libvo-amrwbenc --enable-libgsm --enable-libopencore-amrnb --enable-libopus --enable-libspeex --enable-libvorbis --enable-librubberband libavutil 58. 13.101 / 58. 13.101 libavcodec 60. 21.100 / 60. 21.100 libavformat 60. 9.100 / 60. 9.100 libavdevice 60. 2.100 / 60. 2.100 libavfilter 9. 8.102 / 9. 8.102 libswscale 7. 3.100 / 7. 3.100 libswresample 4. 11.100 / 4. 11.100 libpostproc 57. 2.100 / 57. 2.100 [mp3 @ 000001a0a3355d80] Estimating duration from bitrate, this may be inaccurate Input #0, mp3, from 'josephvoice.mp3': Duration: 00:00:11.89, start: 0.000000, bitrate: 127 kb/s Stream #0:0: Audio: mp3, 44100 Hz, mono, fltp, 128 kb/s Stream mapping: Stream #0:0 -> #0:0 (mp3 (mp3float) -> pcm_s16le (native)) Press [q] to stop, [?] for help Output #0, wav, to 'temp/temp.wav': Metadata: ISFT : Lavf60.9.100 Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, mono, s16, 705 kb/s Metadata: encoder : Lavc60.21.100 pcm_s16le [out#0/wav @ 000001a0a33523c0] video:0kB audio:1024kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.007440% size= 1024kB time=00:00:11.85 bitrate= 707.2kbits/s speed=73.5x Traceback (most recent call last): File "C:\Users\prost\Wav2Lip\inference.py", line 280, in main() File "C:\Users\prost\Wav2Lip\inference.py", line 225, in main mel = audio.melspectrogram(wav) File "C:\Users\prost\Wav2Lip\audio.py", line 47, in melspectrogram S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db File "C:\Users\prost\Wav2Lip\audio.py", line 95, in _linear_to_mel _mel_basis = _build_mel_basis() File "C:\Users\prost\Wav2Lip\audio.py", line 100, in _build_mel_basis return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, TypeError: mel() takes 0 positional arguments but 2 positional arguments (and 3 keyword-only arguments) were given
(env) (base) C:\Users\prost\Wav2Lip>python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face joseph.mp4 --audio josephvoice.mp3 Using cpu for inference. Reading video frames... Number of frames available for inference: 366 Extracting raw audio... ffmpeg version 2023-06-21-git-1bcb8a7338-essentials_build-www.gyan.dev Copyright (c) 2000-2023 the FFmpeg developers built with gcc 12.2.0 (Rev10, Built by MSYS2 project) configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-ffnvcodec --enable-nvdec --enable-nvenc --enable-d3d11va --enable-dxva2 --enable-libvpl --enable-libgme --enable-libopenmpt --enable-libopencore-amrwb --enable-libmp3lame --enable-libtheora --enable-libvo-amrwbenc --enable-libgsm --enable-libopencore-amrnb --enable-libopus --enable-libspeex --enable-libvorbis --enable-librubberband libavutil 58. 13.101 / 58. 13.101 libavcodec 60. 21.100 / 60. 21.100 libavformat 60. 9.100 / 60. 9.100 libavdevice 60. 2.100 / 60. 2.100 libavfilter 9. 8.102 / 9. 8.102 libswscale 7. 3.100 / 7. 3.100 libswresample 4. 11.100 / 4. 11.100 libpostproc 57. 2.100 / 57. 2.100 [mp3 @ 000002a6d5b45d80] Estimating duration from bitrate, this may be inaccurate Input #0, mp3, from 'josephvoice.mp3': Duration: 00:00:11.89, start: 0.000000, bitrate: 127 kb/s Stream #0:0: Audio: mp3, 44100 Hz, mono, fltp, 128 kb/s Stream mapping: Stream #0:0 -> #0:0 (mp3 (mp3float) -> pcm_s16le (native)) Press [q] to stop, [?] for help Output #0, wav, to 'temp/temp.wav': Metadata: ISFT : Lavf60.9.100 Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, mono, s16, 705 kb/s Metadata: encoder : Lavc60.21.100 pcm_s16le [out#0/wav @ 000002a6d5b423c0] video:0kB audio:1024kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.007440% size= 1024kB time=00:00:11.85 bitrate= 707.2kbits/s speed=84.9x Traceback (most recent call last): File "C:\Users\prost\Wav2Lip\inference.py", line 280, in main() File "C:\Users\prost\Wav2Lip\inference.py", line 225, in main mel = audio.melspectrogram(wav) File "C:\Users\prost\Wav2Lip\audio.py", line 47, in melspectrogram S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db File "C:\Users\prost\Wav2Lip\audio.py", line 95, in _linear_to_mel _mel_basis = _build_mel_basis() File "C:\Users\prost\Wav2Lip\audio.py", line 100, in _build_mel_basis return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, TypeError: mel() takes 0 positional arguments but 2 positional arguments (and 3 keyword-only arguments) were given
我又解决方案 可以联系我+q3785510550
— Reply to this email directly, view it on GitHub https://github.com/Rudrabha/Wav2Lip/issues/683#issuecomment-2144407239, or unsubscribe https://github.com/notifications/unsubscribe-auth/A4G22BYEARROREFZZPHRLT3ZFQG6VAVCNFSM6AAAAABIL36Q5WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCNBUGQYDOMRTHE . You are receiving this because you authored the thread.Message ID: @.***>
(env) (base) C:\Users\prost\Wav2Lip>python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face joseph.mp4 --audio josephvoice.mp3 Using cpu for inference. Reading video frames... Number of frames available for inference: 366 Extracting raw audio... ffmpeg version 2023-06-21-git-1bcb8a7338-essentials_build-www.gyan.dev Copyright (c) 2000-2023 the FFmpeg developers built with gcc 12.2.0 (Rev10, Built by MSYS2 project) configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-ffnvcodec --enable-nvdec --enable-nvenc --enable-d3d11va --enable-dxva2 --enable-libvpl --enable-libgme --enable-libopenmpt --enable-libopencore-amrwb --enable-libmp3lame --enable-libtheora --enable-libvo-amrwbenc --enable-libgsm --enable-libopencore-amrnb --enable-libopus --enable-libspeex --enable-libvorbis --enable-librubberband libavutil 58. 13.101 / 58. 13.101 libavcodec 60. 21.100 / 60. 21.100 libavformat 60. 9.100 / 60. 9.100 libavdevice 60. 2.100 / 60. 2.100 libavfilter 9. 8.102 / 9. 8.102 libswscale 7. 3.100 / 7. 3.100 libswresample 4. 11.100 / 4. 11.100 libpostproc 57. 2.100 / 57. 2.100 [mp3 @ 000001a0a3355d80] Estimating duration from bitrate, this may be inaccurate Input #0, mp3, from 'josephvoice.mp3': Duration: 00:00:11.89, start: 0.000000, bitrate: 127 kb/s Stream #0:0: Audio: mp3, 44100 Hz, mono, fltp, 128 kb/s Stream mapping: Stream #0:0 -> #0:0 (mp3 (mp3float) -> pcm_s16le (native)) Press [q] to stop, [?] for help Output #0, wav, to 'temp/temp.wav': Metadata: ISFT : Lavf60.9.100 Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, mono, s16, 705 kb/s Metadata: encoder : Lavc60.21.100 pcm_s16le [out#0/wav @ 000001a0a33523c0] video:0kB audio:1024kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.007440% size= 1024kB time=00:00:11.85 bitrate= 707.2kbits/s speed=73.5x Traceback (most recent call last): File "C:\Users\prost\Wav2Lip\inference.py", line 280, in
main()
File "C:\Users\prost\Wav2Lip\inference.py", line 225, in main
mel = audio.melspectrogram(wav)
File "C:\Users\prost\Wav2Lip\audio.py", line 47, in melspectrogram
S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
File "C:\Users\prost\Wav2Lip\audio.py", line 95, in _linear_to_mel
_mel_basis = _build_mel_basis()
File "C:\Users\prost\Wav2Lip\audio.py", line 100, in _build_mel_basis
return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels,
TypeError: mel() takes 0 positional arguments but 2 positional arguments (and 3 keyword-only arguments) were given
(env) (base) C:\Users\prost\Wav2Lip>python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face joseph.mp4 --audio josephvoice.mp3 Using cpu for inference. Reading video frames... Number of frames available for inference: 366 Extracting raw audio... ffmpeg version 2023-06-21-git-1bcb8a7338-essentials_build-www.gyan.dev Copyright (c) 2000-2023 the FFmpeg developers built with gcc 12.2.0 (Rev10, Built by MSYS2 project) configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-ffnvcodec --enable-nvdec --enable-nvenc --enable-d3d11va --enable-dxva2 --enable-libvpl --enable-libgme --enable-libopenmpt --enable-libopencore-amrwb --enable-libmp3lame --enable-libtheora --enable-libvo-amrwbenc --enable-libgsm --enable-libopencore-amrnb --enable-libopus --enable-libspeex --enable-libvorbis --enable-librubberband libavutil 58. 13.101 / 58. 13.101 libavcodec 60. 21.100 / 60. 21.100 libavformat 60. 9.100 / 60. 9.100 libavdevice 60. 2.100 / 60. 2.100 libavfilter 9. 8.102 / 9. 8.102 libswscale 7. 3.100 / 7. 3.100 libswresample 4. 11.100 / 4. 11.100 libpostproc 57. 2.100 / 57. 2.100 [mp3 @ 000002a6d5b45d80] Estimating duration from bitrate, this may be inaccurate Input #0, mp3, from 'josephvoice.mp3': Duration: 00:00:11.89, start: 0.000000, bitrate: 127 kb/s Stream #0:0: Audio: mp3, 44100 Hz, mono, fltp, 128 kb/s Stream mapping: Stream #0:0 -> #0:0 (mp3 (mp3float) -> pcm_s16le (native)) Press [q] to stop, [?] for help Output #0, wav, to 'temp/temp.wav': Metadata: ISFT : Lavf60.9.100 Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, mono, s16, 705 kb/s Metadata: encoder : Lavc60.21.100 pcm_s16le [out#0/wav @ 000002a6d5b423c0] video:0kB audio:1024kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.007440% size= 1024kB time=00:00:11.85 bitrate= 707.2kbits/s speed=84.9x Traceback (most recent call last): File "C:\Users\prost\Wav2Lip\inference.py", line 280, in
main()
File "C:\Users\prost\Wav2Lip\inference.py", line 225, in main
mel = audio.melspectrogram(wav)
File "C:\Users\prost\Wav2Lip\audio.py", line 47, in melspectrogram
S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
File "C:\Users\prost\Wav2Lip\audio.py", line 95, in _linear_to_mel
_mel_basis = _build_mel_basis()
File "C:\Users\prost\Wav2Lip\audio.py", line 100, in _build_mel_basis
return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels,
TypeError: mel() takes 0 positional arguments but 2 positional arguments (and 3 keyword-only arguments) were given