Open HolyWu opened 1 day ago
import os import torch import torch_tensorrt os.environ["CI_BUILD"] = "1" class MyModule(torch.nn.Module): def __init__(self): super().__init__() def forward(self, x): x[:, :, 0] = 3.1415 return x with torch.inference_mode(): model = MyModule().eval().cuda() inputs = [torch.zeros(1, 2, 3, device="cuda")] trt_model = torch_tensorrt.compile(model, "dynamo", inputs, debug=True, min_block_size=1) print(trt_model(*inputs))
DEBUG:torch_tensorrt.dynamo.lowering.passes.remove_detach:Removed 0 detach nodes: graph(): %c_lifted_tensor_0 : [num_users=1] = placeholder[target=c_lifted_tensor_0] %x : [num_users=2] = placeholder[target=x] %lift_fresh_copy : [num_users=1] = call_function[target=torch.ops.aten.lift_fresh_copy.default](args = (%c_lifted_tensor_0,), kwargs = {}) %slice_1 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_2 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_1, 1, 0, 9223372036854775807), kwargs = {}) %select : [num_users=1] = call_function[target=torch.ops.aten.select.int](args = (%slice_2, 2, 0), kwargs = {}) %fill_ : [num_users=0] = call_function[target=torch.ops.aten.fill_.Tensor](args = (%select, %lift_fresh_copy), kwargs = {}) return (x,) WARNING:py.warnings:/home/holywu/.local/lib/python3.12/site-packages/torch/export/_unlift.py:75: UserWarning: Attempted to insert a get_attr Node with no underlying reference in the owning GraphModule! Call GraphModule.add_submodule to add the necessary submodule, GraphModule.add_parameter to add the necessary Parameter, or nn.Module.register_buffer to add the necessary buffer getattr_node = gm.graph.get_attr(lifted_node) WARNING:py.warnings:/home/holywu/.local/lib/python3.12/site-packages/torch/fx/graph.py:1800: UserWarning: Node lifted_tensor_0 target lifted_tensor_0 lifted_tensor_0 of does not reference an nn.Module, nn.Parameter, or buffer, which is what 'get_attr' Nodes typically target warnings.warn( WARNING:py.warnings:/home/holywu/.local/lib/python3.12/site-packages/torch/export/_unlift.py:75: UserWarning: Attempted to insert a get_attr Node with no underlying reference in the owning GraphModule! Call GraphModule.add_submodule to add the necessary submodule, GraphModule.add_parameter to add the necessary Parameter, or nn.Module.register_buffer to add the necessary buffer getattr_node = gm.graph.get_attr(lifted_node) WARNING:py.warnings:/home/holywu/.local/lib/python3.12/site-packages/torch/fx/graph.py:1800: UserWarning: Node lifted_tensor_0 target lifted_tensor_0 lifted_tensor_0 of does not reference an nn.Module, nn.Parameter, or buffer, which is what 'get_attr' Nodes typically target warnings.warn( DEBUG:torch_tensorrt.dynamo._compiler:Input graph: graph(): %lifted_tensor_0 : [num_users=1] = get_attr[target=lifted_tensor_0] %x : [num_users=3] = placeholder[target=x] %slice_1 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_2 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_1, 1, 0, 9223372036854775807), kwargs = {}) %select : [num_users=1] = call_function[target=torch.ops.aten.select.int](args = (%slice_2, 2, 0), kwargs = {}) %copy : [num_users=1] = call_function[target=torch.ops.aten.copy.default](args = (%select, %lifted_tensor_0), kwargs = {}) %slice_3 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_4 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_3, 1, 0, 9223372036854775807), kwargs = {}) %unsqueeze : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%copy, 2), kwargs = {}) %full : [num_users=1] = call_function[target=torch.ops.aten.full.default](args = ([1, 2], 1), kwargs = {dtype: torch.int64, layout: torch.strided, device: cpu, pin_memory: False}) %mul : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%full, 0), kwargs = {}) %unsqueeze_1 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%mul, 2), kwargs = {}) %cat : [num_users=1] = call_function[target=torch.ops.aten.cat.default](args = ([%unsqueeze_1], 2), kwargs = {}) %_to_copy : [num_users=1] = call_function[target=torch.ops.aten._to_copy.default](args = (%cat,), kwargs = {dtype: torch.int64, layout: torch.strided, device: cuda:0}) %scatter : [num_users=1] = call_function[target=torch.ops.aten.scatter.src](args = (%slice_4, 2, %_to_copy, %unsqueeze), kwargs = {}) %copy__default : [num_users=1] = call_function[target=torch.ops.aten.copy_.default](args = (%x, %scatter), kwargs = {}) return (copy__default,) DEBUG:torch_tensorrt.dynamo.lowering.passes.constant_folding:Graph after constant folding: graph(): %lifted_tensor_0 : [num_users=1] = get_attr[target=lifted_tensor_0] %x : [num_users=3] = placeholder[target=x] %slice_1 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_2 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_1, 1, 0, 9223372036854775807), kwargs = {}) %select : [num_users=1] = call_function[target=torch.ops.aten.select.int](args = (%slice_2, 2, 0), kwargs = {}) %copy : [num_users=1] = call_function[target=torch.ops.aten.copy.default](args = (%select, %lifted_tensor_0), kwargs = {}) %slice_3 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_4 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_3, 1, 0, 9223372036854775807), kwargs = {}) %unsqueeze : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%copy, 2), kwargs = {}) %_frozen_param0 : [num_users=1] = get_attr[target=_frozen_param0] %scatter : [num_users=1] = call_function[target=torch.ops.aten.scatter.src](args = (%slice_4, 2, %_frozen_param0, %unsqueeze), kwargs = {}) %copy__default : [num_users=1] = call_function[target=torch.ops.aten.copy_.default](args = (%x, %scatter), kwargs = {}) return (copy__default,) DEBUG:torch_tensorrt.dynamo.lowering.passes.remove_assert_scalar:Removed 0 assert_scalar nodes: graph(): %lifted_tensor_0 : [num_users=1] = get_attr[target=lifted_tensor_0] %x : [num_users=3] = placeholder[target=x] %slice_1 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_2 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_1, 1, 0, 9223372036854775807), kwargs = {}) %select : [num_users=1] = call_function[target=torch.ops.aten.select.int](args = (%slice_2, 2, 0), kwargs = {}) %copy : [num_users=1] = call_function[target=torch.ops.aten.copy.default](args = (%select, %lifted_tensor_0), kwargs = {}) %slice_3 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_4 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_3, 1, 0, 9223372036854775807), kwargs = {}) %unsqueeze : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%copy, 2), kwargs = {}) %_frozen_param0 : [num_users=1] = get_attr[target=_frozen_param0] %scatter : [num_users=1] = call_function[target=torch.ops.aten.scatter.src](args = (%slice_4, 2, %_frozen_param0, %unsqueeze), kwargs = {}) %copy__default : [num_users=1] = call_function[target=torch.ops.aten.copy_.default](args = (%x, %scatter), kwargs = {}) return (copy__default,) DEBUG:torch_tensorrt.dynamo.lowering.passes.accumulate_fp32_matmul:Skipping FP32 accumulation for matmul layers as use_fp32_acc is not enabled in the compilation settings DEBUG:torch_tensorrt.dynamo._compiler:Lowered Input graph: graph(): %lifted_tensor_0 : [num_users=1] = get_attr[target=lifted_tensor_0] %x : [num_users=3] = placeholder[target=x] %slice_1 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_2 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_1, 1, 0, 9223372036854775807), kwargs = {}) %select : [num_users=1] = call_function[target=torch.ops.aten.select.int](args = (%slice_2, 2, 0), kwargs = {}) %copy : [num_users=1] = call_function[target=torch.ops.aten.copy.default](args = (%select, %lifted_tensor_0), kwargs = {}) %slice_3 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_4 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_3, 1, 0, 9223372036854775807), kwargs = {}) %unsqueeze : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%copy, 2), kwargs = {}) %_frozen_param0 : [num_users=1] = get_attr[target=_frozen_param0] %scatter : [num_users=1] = call_function[target=torch.ops.aten.scatter.src](args = (%slice_4, 2, %_frozen_param0, %unsqueeze), kwargs = {}) %copy__default : [num_users=1] = call_function[target=torch.ops.aten.copy_.default](args = (%x, %scatter), kwargs = {}) return (copy__default,) DEBUG:torch_tensorrt.dynamo.partitioning._global_partitioner: Supported Nodes: - torch.ops.aten.slice.Tensor + Operator Count: 4 - torch.ops.aten.select.int + Operator Count: 1 - torch.ops.aten.copy.default + Operator Count: 1 - torch.ops.aten.unsqueeze.default + Operator Count: 1 - torch.ops.aten.scatter.src + Operator Count: 1 DEBUG:torch_tensorrt.dynamo.partitioning._global_partitioner: All Nodes Supported DEBUG:torch_tensorrt.dynamo._compiler:Detected support for 8 operators out of 9 in subgraph. WARNING:torch_tensorrt.dynamo._compiler:Node copy__default of op type call_function does not have metadata. This could sometimes lead to undefined behavior. WARNING:torch_tensorrt.dynamo._compiler:Some nodes do not have metadata (shape and dtype information). This could lead to problems sometimes if the graph has PyTorch and TensorRT segments. INFO:torch_tensorrt.dynamo._compiler:Partitioning the graph via the fast partitioner WARNING:py.warnings:/home/holywu/.local/lib/python3.12/site-packages/torch/fx/graph.py:1800: UserWarning: Node lifted_tensor_0 target lifted_tensor_0 lifted_tensor_0 of does not reference an nn.Module, nn.Parameter, or buffer, which is what 'get_attr' Nodes typically target warnings.warn( DEBUG:torch_tensorrt.dynamo.partitioning._adjacency_partitioner: Number of TensorRT-Accelerated Engines Generated: 1 DEBUG:torch_tensorrt.dynamo.partitioning._adjacency_partitioner: Supported Nodes: - torch.ops.aten.slice.Tensor + Operator Count: 4 - torch.ops.aten.select.int + Operator Count: 1 - torch.ops.aten.copy.default + Operator Count: 1 - torch.ops.aten.unsqueeze.default + Operator Count: 1 - torch.ops.aten.scatter.src + Operator Count: 1 DEBUG:torch_tensorrt.dynamo.partitioning._adjacency_partitioner: All Nodes Supported DEBUG:torch_tensorrt.dynamo._compiler:Updated metadata for node: _run_on_acc_0 with its corresponding submodule outputs DEBUG:torch_tensorrt.dynamo._compiler:Converting submodule: _run_on_acc_0 Input shapes: [(1, 2, 3)] graph(): %x : [num_users=2] = placeholder[target=x] %slice_1 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_2 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_1, 1, 0, 9223372036854775807), kwargs = {}) %select : [num_users=1] = call_function[target=torch.ops.aten.select.int](args = (%slice_2, 2, 0), kwargs = {}) %lifted_tensor_0 : [num_users=1] = get_attr[target=lifted_tensor_0] %copy : [num_users=1] = call_function[target=torch.ops.aten.copy.default](args = (%select, %lifted_tensor_0), kwargs = {}) %slice_3 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%x, 0, 0, 9223372036854775807), kwargs = {}) %slice_4 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_3, 1, 0, 9223372036854775807), kwargs = {}) %unsqueeze : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%copy, 2), kwargs = {}) %_frozen_param0 : [num_users=1] = get_attr[target=_frozen_param0] %scatter : [num_users=1] = call_function[target=torch.ops.aten.scatter.src](args = (%slice_4, 2, %_frozen_param0, %unsqueeze), kwargs = {}) return scatter DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node x (kind: x, args: ()) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Adding input to in-progress INetwork: x [shape=[1, 2, 3], dtype=DataType.FLOAT] INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node x [x] (Inputs: () | Outputs: (x: (1, 2, 3)@torch.float32)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node /slice_1 (kind: aten.slice.Tensor, args: ('x <Node>', '0 <int>', '0 <int>', '9223372036854775807 <int>')) INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node /slice_1 [aten.slice.Tensor] (Inputs: (x: (1, 2, 3)@torch.float32, 0, 0, 9223372036854775807) | Outputs: (slice_1: (1, 2, 3)@torch.float32)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node /slice_2 (kind: aten.slice.Tensor, args: ('slice_1 <Node>', '1 <int>', '0 <int>', '9223372036854775807 <int>')) INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node /slice_2 [aten.slice.Tensor] (Inputs: (slice_1: (1, 2, 3)@torch.float32, 1, 0, 9223372036854775807) | Outputs: (slice_2: (1, 2, 3)@torch.float32)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node /select (kind: aten.select.int, args: ('slice_2 <Node>', '2 <int>', '0 <int>')) INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node /select [aten.select.int] (Inputs: (slice_2: (1, 2, 3)@torch.float32, 2, 0) | Outputs: (select: (1, 2)@torch.float32)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node lifted_tensor_0 (kind: lifted_tensor_0, args: ()) INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node lifted_tensor_0 [lifted_tensor_0] (Inputs: () | Outputs: (lifted_tensor_0: ()@float32)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node /copy (kind: aten.copy.default, args: ('select <Node>', 'lifted_tensor_0 <Node>')) DEBUG:torch_tensorrt.dynamo.conversion.converter_utils:Freezing tensor /copy_constant_1 to TRT IConstantLayer DEBUG:torch_tensorrt.dynamo.conversion.converter_utils:Scalar numpy detected at /copy_constant_1, adding rank (1,) INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node /copy [aten.copy.default] (Inputs: (select: (1, 2)@torch.float32, lifted_tensor_0: ()@float32) | Outputs: (copy: (1, 2)@torch.float32)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node /slice_3 (kind: aten.slice.Tensor, args: ('x <Node>', '0 <int>', '0 <int>', '9223372036854775807 <int>')) INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node /slice_3 [aten.slice.Tensor] (Inputs: (x: (1, 2, 3)@torch.float32, 0, 0, 9223372036854775807) | Outputs: (slice_3: (1, 2, 3)@torch.float32)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node /slice_4 (kind: aten.slice.Tensor, args: ('slice_3 <Node>', '1 <int>', '0 <int>', '9223372036854775807 <int>')) INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node /slice_4 [aten.slice.Tensor] (Inputs: (slice_3: (1, 2, 3)@torch.float32, 1, 0, 9223372036854775807) | Outputs: (slice_4: (1, 2, 3)@torch.float32)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node /unsqueeze (kind: aten.unsqueeze.default, args: ('copy <Node>', '2 <int>')) INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node /unsqueeze [aten.unsqueeze.default] (Inputs: (copy: (1, 2)@torch.float32, 2) | Outputs: (unsqueeze: (1, 2, 1)@torch.float32)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node _frozen_param0 (kind: _frozen_param0, args: ()) INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node _frozen_param0 [_frozen_param0] (Inputs: () | Outputs: (_frozen_param0: (1, 2, 1)@int64)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node /scatter (kind: aten.scatter.src, args: ('slice_4 <Node>', '2 <int>', '_frozen_param0 <Node>', 'unsqueeze <Node>')) DEBUG:torch_tensorrt.dynamo.conversion.converter_utils:Freezing tensor /scatter_constant_2 to TRT IConstantLayer INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node /scatter [aten.scatter.src] (Inputs: (slice_4: (1, 2, 3)@torch.float32, 2, _frozen_param0: (1, 2, 1)@int64, unsqueeze: (1, 2, 1)@torch.float32) | Outputs: (scatter: (1, 2, 3)@torch.float32)) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converting node output (kind: output, args: ('scatter <Node>',)) ERROR:torch_tensorrt [TensorRT Conversion Context]:ITensor::getDimensions: Error Code 4: API Usage Error ([SCATTER]-[aten_ops.scatter.src]-[/scatter_scatter_layer]: ScatterLayer in elements mode all inputs tensors rank must be same. Input 0 rank is 3, input 1 rank is 3, and input 2 rank is 2.) DEBUG:torch_tensorrt.dynamo.conversion._TRTInterpreter:Marking output output0 [shape=(81), dtype=DataType.FLOAT] INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Converted node output [output] (Inputs: (scatter: (1, 2, 3)@torch.float32) | Outputs: (output: )) INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:TRT INetwork construction elapsed time: 0:00:00.033800 INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Not found cached TRT engines. Start building engine. ERROR:torch_tensorrt [TensorRT Conversion Context]:ITensor::getDimensions: Error Code 4: API Usage Error (Output shape can not be computed for node [SCATTER]-[aten_ops.scatter.src]-[/scatter_scatter_layer].) ERROR:torch_tensorrt [TensorRT Conversion Context]:IBuilder::buildSerializedNetwork: Error Code 4: API Usage Error ([SCATTER]-[aten_ops.scatter.src]-[/scatter_scatter_layer]: ScatterLayer in elements mode all inputs tensors rank must be same. Input 0 rank is 3, input 1 rank is 3, and input 2 rank is 2.) Traceback (most recent call last): File "/home/holywu/scatter.py", line 22, in <module> trt_model = torch_tensorrt.compile(model, "dynamo", inputs, debug=True, min_block_size=1) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/holywu/.local/lib/python3.12/site-packages/torch_tensorrt/_compile.py", line 286, in compile trt_graph_module = dynamo_compile( ^^^^^^^^^^^^^^^ File "/home/holywu/.local/lib/python3.12/site-packages/torch_tensorrt/dynamo/_compiler.py", line 608, in compile trt_gm = compile_module( ^^^^^^^^^^^^^^^ File "/home/holywu/.local/lib/python3.12/site-packages/torch_tensorrt/dynamo/_compiler.py", line 810, in compile_module trt_module = convert_module( ^^^^^^^^^^^^^^^ File "/home/holywu/.local/lib/python3.12/site-packages/torch_tensorrt/dynamo/conversion/_conversion.py", line 90, in convert_module interpreter_result = interpret_module_to_result( ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/holywu/.local/lib/python3.12/site-packages/torch_tensorrt/dynamo/conversion/_conversion.py", line 69, in interpret_module_to_result interpreter_result = interpreter.run() ^^^^^^^^^^^^^^^^^ File "/home/holywu/.local/lib/python3.12/site-packages/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py", line 645, in run assert serialized_engine ^^^^^^^^^^^^^^^^^ AssertionError
conda
pip
libtorch
To Reproduce
Environment
conda
,pip
,libtorch
, source): pip