running on... cuda:0
adding lip conditioning ./assets/iter-0200000.pt
Loading checkpoints from [checkpoints/diffusion/c1_face/model000155000.pt]...
running on... cuda:0
using keyframes: torch.Size([1, 20, 256])
loading checkpoint from checkpoints/vq/c1_pose/net_iter300000.pth
loading TRANSFORMER checkpoint from checkpoints/guide/c1_pose/checkpoints/iter-0100000.pt
Loading checkpoints from [checkpoints/diffusion/c1_pose/model000340000.pt]...
H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3527.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\nn\utils\weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
WARNING:visualize.ca_body.nn.color_cal:Requested color-calibration identity camera not present, defaulting to 400883.
loading... ./checkpoints/ca_body/data/PXB184/body_dec.ckpt
Running on local URL: http://127.0.0.1:1000
To create a public link, set share=True in launch().
0%| | 0/100 [00:00<?, ?it/s]
Traceback (most recent call last):
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\gradio\queueing.py", line 532, in process_events
response = await route_utils.call_process_api(
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\gradio\route_utils.py", line 276, in call_process_api
output = await app.get_blocks().process_api(
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\gradio\blocks.py", line 1928, in process_api
result = await self.call_function(
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\gradio\blocks.py", line 1514, in call_function
prediction = await anyio.to_thread.run_sync(
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\anyio\to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\anyio_backends_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\anyio_backends_asyncio.py", line 859, in run
result = context.run(func, args)
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\gradio\utils.py", line 832, in wrapper
response = f(args, kwargs)
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\demo\demo.py", line 220, in audio_to_avatar
face_results, pose_results, audio = generate_results(audio, num_repetitions, top_p)
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\demo\demo.py", line 188, in generate_results
gradio_model.generate_sequences(
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\demo\demo.py", line 148, in generate_sequences
sample = self._run_single_diffusion(
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\demo\demo.py", line 100, in _run_single_diffusion
sample = sample_fn(
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\gaussian_diffusion.py", line 845, in ddim_sample_loop
for sample in self.ddim_sample_loop_progressive(
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\gaussian_diffusion.py", line 925, in ddim_sample_loop_progressive
out = sample_fn(
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\gaussian_diffusion.py", line 683, in ddim_sample
out_orig = self.p_mean_variance(
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\respace.py", line 105, in p_mean_variance
return super().p_mean_variance(self._wrap_model(model), *args, kwargs)
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\gaussian_diffusion.py", line 287, in p_mean_variance
model_output = model(x, self._scale_timesteps(t), model_kwargs)
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\respace.py", line 145, in call
return self.model(x, new_ts, *kwargs)
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, kwargs)
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\model\cfg_sampler.py", line 32, in forward
out = self.model(x, timesteps, y, cond_drop_prob=0.0)
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(args, kwargs)
File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\model\diffusion.py", line 379, in forward
cond_tokens = torch.where(
RuntimeError: The size of tensor a (2398) must match the size of tensor b (1998) at non-singleton dimension 1
running on... cuda:0 adding lip conditioning ./assets/iter-0200000.pt Loading checkpoints from [checkpoints/diffusion/c1_face/model000155000.pt]... running on... cuda:0 using keyframes: torch.Size([1, 20, 256]) loading checkpoint from checkpoints/vq/c1_pose/net_iter300000.pth loading TRANSFORMER checkpoint from checkpoints/guide/c1_pose/checkpoints/iter-0100000.pt Loading checkpoints from [checkpoints/diffusion/c1_pose/model000340000.pt]... H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3527.) return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined] H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\nn\utils\weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") WARNING:visualize.ca_body.nn.color_cal:Requested color-calibration identity camera not present, defaulting to 400883. loading... ./checkpoints/ca_body/data/PXB184/body_dec.ckpt Running on local URL: http://127.0.0.1:1000
To create a public link, set
share=True
inlaunch()
. 0%| | 0/100 [00:00<?, ?it/s] Traceback (most recent call last): File "H:\Anaconda3\envs\a2p_env\lib\site-packages\gradio\queueing.py", line 532, in process_events response = await route_utils.call_process_api( File "H:\Anaconda3\envs\a2p_env\lib\site-packages\gradio\route_utils.py", line 276, in call_process_api output = await app.get_blocks().process_api( File "H:\Anaconda3\envs\a2p_env\lib\site-packages\gradio\blocks.py", line 1928, in process_api result = await self.call_function( File "H:\Anaconda3\envs\a2p_env\lib\site-packages\gradio\blocks.py", line 1514, in call_function prediction = await anyio.to_thread.run_sync( File "H:\Anaconda3\envs\a2p_env\lib\site-packages\anyio\to_thread.py", line 56, in run_sync return await get_async_backend().run_sync_in_worker_thread( File "H:\Anaconda3\envs\a2p_env\lib\site-packages\anyio_backends_asyncio.py", line 2177, in run_sync_in_worker_thread return await future File "H:\Anaconda3\envs\a2p_env\lib\site-packages\anyio_backends_asyncio.py", line 859, in run result = context.run(func, args) File "H:\Anaconda3\envs\a2p_env\lib\site-packages\gradio\utils.py", line 832, in wrapper response = f(args, kwargs) File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\demo\demo.py", line 220, in audio_to_avatar face_results, pose_results, audio = generate_results(audio, num_repetitions, top_p) File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\demo\demo.py", line 188, in generate_results gradio_model.generate_sequences( File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\demo\demo.py", line 148, in generate_sequences sample = self._run_single_diffusion( File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\demo\demo.py", line 100, in _run_single_diffusion sample = sample_fn( File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\gaussian_diffusion.py", line 845, in ddim_sample_loop for sample in self.ddim_sample_loop_progressive( File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\gaussian_diffusion.py", line 925, in ddim_sample_loop_progressive out = sample_fn( File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\gaussian_diffusion.py", line 683, in ddim_sample out_orig = self.p_mean_variance( File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\respace.py", line 105, in p_mean_variance return super().p_mean_variance(self._wrap_model(model), *args, kwargs) File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\gaussian_diffusion.py", line 287, in p_mean_variance model_output = model(x, self._scale_timesteps(t), model_kwargs) File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\diffusion\respace.py", line 145, in call return self.model(x, new_ts, *kwargs) File "H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl return self._call_impl(args, kwargs) File "H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl return forward_call(*args, kwargs) File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\model\cfg_sampler.py", line 32, in forward out = self.model(x, timesteps, y, cond_drop_prob=0.0) File "H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, *kwargs) File "H:\Anaconda3\envs\a2p_env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl return forward_call(args, kwargs) File "D:\2023project\project\0win_os\05speech_Pro\audio2photoreal-main\model\diffusion.py", line 379, in forward cond_tokens = torch.where( RuntimeError: The size of tensor a (2398) must match the size of tensor b (1998) at non-singleton dimension 1this i test audio?I test several audio, some audio has this error. Why is this example: https://github.com/facebookresearch/audio2photoreal/assets/48466610/893db5a5-ca48-460a-8e5e-32929272875a