ErrorFromChoice(msg, choice, debug_str()) # noqa: TRY200

mxdlzg commented 4 months ago

python speed.py
Warm up start!
W0709 10:11:47.747786 27996 torch\_inductor\utils.py:945] [0/0] not enough SMs to use max_autotune_gemm mode
Traceback (most recent call last):
  File "E:\ExtraData\Projects\PyProject\LivePortrait\speed.py", line 192, in <module>
    main()
  File "E:\ExtraData\Projects\PyProject\LivePortrait\speed.py", line 182, in main
    warm_up_models(compiled_models, stitching_retargeting_module, inputs)
  File "E:\ExtraData\Projects\PyProject\LivePortrait\speed.py", line 87, in warm_up_models
    compiled_models['Appearance Feature Extractor'](inputs['source_image'])
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\eval_frame.py", line 451, in _fn
    return fn(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\convert_frame.py", line 921, in catch_errors
    return callback(frame, cache_entry, hooks, frame_state, skip=1)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\convert_frame.py", line 786, in _convert_frame
    result = inner_convert(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\convert_frame.py", line 400, in _convert_frame_assert
    return _compile(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\convert_frame.py", line 676, in _compile
    guarded_code = compile_inner(code, one_graph, hooks, transform)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\utils.py", line 262, in time_wrapper
    r = func(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\convert_frame.py", line 535, in compile_inner
    out_code = transform_code_object(code, transform)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\bytecode_transformation.py", line 1036, in transform_code_object
    transformations(instructions, code_options)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\convert_frame.py", line 165, in _fn
    return fn(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\convert_frame.py", line 500, in transform
    tracer.run()
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\symbolic_convert.py", line 2149, in run
    super().run()
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\symbolic_convert.py", line 810, in run
    and self.step()
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\symbolic_convert.py", line 773, in step
    getattr(self, inst.opname)(inst)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\symbolic_convert.py", line 2268, in RETURN_VALUE
    self.output.compile_subgraph(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\output_graph.py", line 971, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\output_graph.py", line 1168, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\utils.py", line 262, in time_wrapper
    r = func(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\output_graph.py", line 1241, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e).with_traceback(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\output_graph.py", line 1222, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.example_inputs())
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\repro\after_dynamo.py", line 117, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\__init__.py", line 1729, in __call__
    return compile_fx(model_, inputs_, config_patches=self.config)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\compile_fx.py", line 1102, in compile_fx
    return compile_fx(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\compile_fx.py", line 1330, in compile_fx
    return aot_autograd(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\backends\common.py", line 58, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_functorch\aot_autograd.py", line 903, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\utils.py", line 262, in time_wrapper
    r = func(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_functorch\aot_autograd.py", line 628, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_args, aot_config, fw_metadata=fw_metadata)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_functorch\_aot_autograd\runtime_wrappers.py", line 443, in aot_wrapper_dedupe
    return compiler_fn(flat_fn, leaf_flat_args, aot_config, fw_metadata=fw_metadata)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_functorch\_aot_autograd\runtime_wrappers.py", line 648, in aot_wrapper_synthetic_base
    return compiler_fn(flat_fn, flat_args, aot_config, fw_metadata=fw_metadata)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_functorch\_aot_autograd\jit_compile_runtime_wrappers.py", line 119, in aot_dispatch_base
    compiled_fw = compiler(fw_module, updated_flat_args)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\utils.py", line 262, in time_wrapper
    r = func(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\compile_fx.py", line 1257, in fw_compiler_base
    return inner_compile(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\repro\after_aot.py", line 83, in debug_wrapper
    inner_compiled_fn = compiler_fn(gm, example_inputs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\debug.py", line 304, in inner
    return fn(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\utils.py", line 262, in time_wrapper
    r = func(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\compile_fx.py", line 438, in compile_fx_inner
    compiled_graph = fx_codegen_and_compile(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\compile_fx.py", line 698, in fx_codegen_and_compile
    graph.run(*example_inputs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_dynamo\utils.py", line 262, in time_wrapper
    r = func(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\graph.py", line 612, in run
    return super().run(*args)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\fx\interpreter.py", line 145, in run
    self.env[node] = self.run_node(node)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\graph.py", line 946, in run_node
    result = self.call_function(n.target, args, kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\graph.py", line 819, in call_function
    raise LoweringException(e, target, args, kwargs).with_traceback(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\graph.py", line 816, in call_function
    out = lowerings[target](*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\lowering.py", line 296, in wrapped
    out = decomp_fn(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\kernel\conv.py", line 364, in convolution
    return convert_1x1_conv_to_mm(x, weight, bias)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\kernel\conv.py", line 291, in convert_1x1_conv_to_mm
    result = L[aten.addmm](bias, x, weight)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\lowering.py", line 296, in wrapped
    out = decomp_fn(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\kernel\mm.py", line 249, in tuned_addmm
    return autotune_select_algorithm(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\select_algorithm.py", line 1146, in autotune_select_algorithm
    return _ALGORITHM_SELECTOR_CACHE(*args, **kwargs)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\select_algorithm.py", line 896, in __call__
    timings = self.lookup(
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\codecache.py", line 305, in lookup
    raise e
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\codecache.py", line 296, in lookup
    timings = benchmark(choices)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\select_algorithm.py", line 887, in autotune
    return make_benchmark_fn()(choices)
  File "D:\OwnProgram\Anaconda3\envs\LivePortrait\lib\site-packages\torch\_inductor\select_algorithm.py", line 1012, in benchmark_in_current_process
    raise ErrorFromChoice(msg, choice, debug_str())  # noqa: TRY200
torch._dynamo.exc.BackendCompilerFailed: backend='inductor' raised:
LoweringException: ErrorFromChoice: requires Triton
From choice ExternKernelCaller(extern_kernels.bias_addmm)
inputs = [
    torch.empty_strided((512,), (1,), dtype=torch.float16, device='cuda'),
    torch.empty_strided((1, 256, 64, 64), (1048576, 4096, 64, 1), dtype=torch.float16, device='cuda'),
    torch.empty_strided((512, 256, 1, 1), (256, 1, 1, 1), dtype=torch.float16, device='cuda'),
]
out = torch.empty_strided((4096, 512), (512, 1), dtype=torch.float16, device='cuda')

  target: aten.convolution.default
  args[0]: TensorBox(StorageBox(
    ComputedBuffer(name='buf11', layout=FixedLayout('cuda', torch.float16, size=[1, 256, 64, 64], stride=[1048576, 4096, 64, 1]), data=Pointwise(
      'cuda',
      torch.float16,
      def inner_fn(index):
          _, i1, i2, i3 = index
          tmp0 = ops.load(buf10, 2 * i3 + 256 * i2 + 16384 * i1)
          tmp1 = ops.load(buf10, 1 + 2 * i3 + 256 * i2 + 16384 * i1)
          tmp2 = tmp1 + tmp0
          tmp3 = ops.load(buf10, 128 + 2 * i3 + 256 * i2 + 16384 * i1)
          tmp4 = tmp3 + tmp2
          tmp5 = ops.load(buf10, 129 + 2 * i3 + 256 * i2 + 16384 * i1)
          tmp6 = tmp5 + tmp4
          tmp7 = ops.constant(0.25, torch.float16)
          tmp8 = tmp6 * tmp7
          return tmp8
      ,
      ranges=[1, 256, 64, 64],
      origin_node=avg_pool2d_1,
      origins={convert_element_type_5, avg_pool2d_1, relu_2, mul_2,...
    ))
  ))
  args[1]: TensorBox(StorageBox(
    InputBuffer(name='arg12_1', layout=FixedLayout('cuda', torch.float16, size=[512, 256, 1, 1], stride=[256, 1, 1, 1]))
  ))
  args[2]: TensorBox(StorageBox(
    InputBuffer(name='arg13_1', layout=FixedLayout('cuda', torch.float16, size=[512], stride=[1]))
  ))
  args[3]: [1, 1]
  args[4]: [0, 0]
  args[5]: [1, 1]
  args[6]: False
  args[7]: [0, 0]
  args[8]: 1

Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information

You can suppress this exception and fall back to eager by setting:
    import torch._dynamo
    torch._dynamo.config.suppress_errors = True

zzzweakman commented 4 months ago

Have you installed triton? If installed, what is its version? @mxdlzg

mxdlzg commented 4 months ago

Just installed requirements.txt.

 pip list
Package                   Version
------------------------- ------------
aiofiles                  23.2.1
albucore                  0.0.12
albumentations            1.4.10
altair                    5.3.0
annotated-types           0.7.0
anyio                     4.4.0
attrs                     23.2.0
certifi                   2024.7.4
charset-normalizer        3.3.2
click                     8.1.7
colorama                  0.4.6
coloredlogs               15.0.1
contourpy                 1.2.1
cycler                    0.12.1
dnspython                 2.6.1
docstring_parser          0.16
email_validator           2.2.0
eval_type_backport        0.2.0
exceptiongroup            1.2.1
fastapi                   0.111.0
fastapi-cli               0.0.4
ffmpeg                    1.4
ffmpy                     0.3.2
filelock                  3.15.4
flatbuffers               24.3.25
fonttools                 4.53.1
fsspec                    2024.6.1
gradio                    4.37.1
gradio_client             1.0.2
h11                       0.14.0
httpcore                  1.0.5
httptools                 0.6.1
httpx                     0.27.0
huggingface-hub           0.23.4
humanfriendly             10.0
idna                      3.7
imageio                   2.34.2
imageio-ffmpeg            0.5.1
importlib_resources       6.4.0
intel-openmp              2021.4.0
Jinja2                    3.1.4
joblib                    1.4.2
jsonschema                4.23.0
jsonschema-specifications 2023.12.1
kiwisolver                1.4.5
lazy_loader               0.4
lmdb                      1.4.1
markdown-it-py            3.0.0
MarkupSafe                2.1.5
matplotlib                3.9.0
mdurl                     0.1.2
mkl                       2021.4.0
mpmath                    1.3.0
networkx                  3.2.1
numpy                     1.26.4
onnx                      1.16.1
onnxruntime-gpu           1.18.0
opencv-python             4.10.0.84
opencv-python-headless    4.10.0.84
orjson                    3.10.6
packaging                 24.1
pandas                    2.2.2
pillow                    10.4.0
pip                       24.0
protobuf                  5.27.2
pydantic                  2.8.2
pydantic_core             2.20.1
pydub                     0.25.1
Pygments                  2.18.0
pyparsing                 3.1.2
pyreadline3               3.4.1
python-dateutil           2.9.0.post0
python-dotenv             1.0.1
python-multipart          0.0.9
pytz                      2024.1
PyYAML                    6.0.1
referencing               0.35.1
requests                  2.32.3
rich                      13.7.1
rpds-py                   0.19.0
ruff                      0.5.1
scikit-image              0.24.0
scikit-learn              1.5.1
scipy                     1.13.1
semantic-version          2.10.0
setuptools                69.5.1
shellingham               1.5.4
shtab                     1.7.1
six                       1.16.0
sniffio                   1.3.1
starlette                 0.37.2
sympy                     1.13.0
tbb                       2021.13.0
threadpoolctl             3.5.0
tifffile                  2024.7.2
tomli                     2.0.1
tomlkit                   0.12.0
toolz                     0.12.1
torch                     2.3.0+cu118
torchaudio                2.3.0+cu118
torchvision               0.18.0+cu118
tqdm                      4.66.4
typer                     0.12.3
typing_extensions         4.12.2
tyro                      0.8.5
tzdata                    2024.1
ujson                     5.10.0
urllib3                   2.2.2
uvicorn                   0.30.1
watchfiles                0.22.0
websockets                11.0.3
wheel                     0.43.0
zipp                      3.19.2

zzzweakman commented 4 months ago

It seems that triton has not been installed automatically with torch. Try running: pip install triton==2.3.0. @mxdlzg

FurkanGozukara commented 4 months ago

there is no triton package for windows

there are some pre-compiled wheels

it doesnt make difference also

still my auto installer installs 2.1.0 pre-compiled python 3.10 windows triton package

https://youtu.be/FPtpNrmuwXk

KwaiVGI / LivePortrait

ErrorFromChoice(msg, choice, debug_str()) # noqa: TRY200 #63

Animate Static Photos into Talking Videos with LivePortrait AI Compose Perfect Expressions Fast