AITemplate is a Python framework which renders neural network into high performance CUDA/HIP C++ code. Specialized for FP16 TensorCore (NVIDIA GPU) and MatrixCore (AMD GPU) inference.
Traceback (most recent call last):
File "./scripts/compile.py", line 107, in
compile_diffusers()
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1157, in call
return self.main(args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1078, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, ctx.params)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 783, in invoke
return __callback(args, **kwargs)
File "./scripts/compile.py", line 71, in compile_diffusers
compile_clip(
File "/root/AITemplate-rocm/examples/05_stable_diffusion/src/compile_lib/compile_clip.py", line 87, in compile_clip
compile_model(Y, target, "./tmp", "CLIPTextModel", constants=params_ait)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/compiler.py", line 256, in compile_model
graph = compiler.transform.optimize_graph(
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/optimize_graph.py", line 136, in optimize_graph
sorted_graph = func(sorted_graph, workdir)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_strided_ops.py", line 492, in transform_strided_ops
sorted_graph = func(sorted_graph)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_strided_op_and_view_op.py", line 151, in _fuse_strided_op_and_view_op
sorted_graph = _fuse_strided_op_and_view_op_single_pass(sorted_graph)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_strided_op_and_view_op.py", line 122, in _fuse_strided_op_and_view_op_single_pass
return transform_utils.sanitize_sorted_graph(sorted_graph)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_utils.py", line 280, in sanitize_sorted_graph
check_graph_validity(new_sorted_graph, raiseError=True)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_utils.py", line 64, in check_graph_validity
valid = handleError(
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_utils.py", line 47, in handleError
raise RuntimeError(msg)
RuntimeError: Input tensor size_2_0 not established in graph for op expand_3
[Draft]Merging upstream(need support)
'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'layernorm_232_0', 'encoder_layers_11_self_attn_proj_q_weight', 'encoder_layers_11_self_attn_proj_q_bias'], 'name': 'gemm_rcr_bias_233', 'nop': False, 'num_sources': 0, 'op': 'gemm_rcr_bias', 'original_name': 'gemm_rcr_bias_233', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([], [1]), ([1], [2, 3])], 'actual_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_236_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_236_0_dim_2', 'nop': False, 'symbolic_value': 12, 'values': [12]}, { 'depth': 0, 'name': 'reshape_236_0_dim_3', 'nop': False, 'symbolic_value': 64, 'values': [64]}], 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_q_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['reshape_236_0'], 'permute_shape': '', 'split_k': 1, 'workspace': 0}
{ 'alpha': 1.0, 'depth': 161, 'epilogue': 'LinearCombination', 'epilogue_alignment': 8, 'f_ab_alignment': <function gemm_rcr.init..cal_align_ab at 0x7f9ceceafb80>,
'has_profiler': True,
'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'encoder_layers_10_mlp_fc2_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_k_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]},
{ 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_k_weight_dim_1',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_k_bias_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'}],
'inputs': [ 'layernorm_232_0',
'encoder_layers_11_self_attn_proj_k_weight',
'encoder_layers_11_self_attn_proj_k_bias'],
'name': 'gemm_rcr_bias_234',
'nop': False,
'num_sources': 0,
'op': 'gemm_rcr_bias',
'original_name': 'gemm_rcr_bias_234',
'output_accessors': [ { '_dim_mapping': [([0], [0]), ([], [1]), ([1], [2, 3])],
'actual_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'reshape_237_0_dim_1',
'nop': False,
'symbolic_value': 1,
'values': [1]},
{ 'depth': 0,
'name': 'reshape_237_0_dim_2',
'nop': False,
'symbolic_value': 12,
'values': [12]},
{ 'depth': 0,
'name': 'reshape_237_0_dim_3',
'nop': False,
'symbolic_value': 64,
'values': [64]}],
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77*batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_k_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'}],
'outputs': ['reshape_237_0'],
'permute_shape': '',
'split_k': 1,
'workspace': 0}
{ 'alpha': 1.0, 'depth': 161, 'epilogue': 'LinearCombination', 'epilogue_alignment': 8, 'f_ab_alignment': <function gemm_rcr.init..cal_align_ab at 0x7f9ceceaff70>,
'has_profiler': True,
'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'encoder_layers_10_mlp_fc2_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_v_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]},
{ 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_v_weight_dim_1',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_v_bias_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'}],
'inputs': [ 'layernorm_232_0',
'encoder_layers_11_self_attn_proj_v_weight',
'encoder_layers_11_self_attn_proj_v_bias'],
'name': 'gemm_rcr_bias_235',
'nop': False,
'num_sources': 0,
'op': 'gemm_rcr_bias',
'original_name': 'gemm_rcr_bias_235',
'output_accessors': [ { '_dim_mapping': [([0], [0]), ([], [1]), ([1], [2, 3])],
'actual_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'reshape_238_0_dim_1',
'nop': False,
'symbolic_value': 1,
'values': [1]},
{ 'depth': 0,
'name': 'reshape_238_0_dim_2',
'nop': False,
'symbolic_value': 12,
'values': [12]},
{ 'depth': 0,
'name': 'reshape_238_0_dim_3',
'nop': False,
'symbolic_value': 64,
'values': [64]}],
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77*batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_v_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'}],
'outputs': ['reshape_238_0'],
'permute_shape': '',
'split_k': 1,
'workspace': 0}
{ 'depth': 163, 'dims': [0, 2, 1, 3], 'has_profiler': False, 'inputs': ['reshape_236_0'], 'name': 'permute_239', 'nop': False, 'op': 'permute', 'original_name': 'permute_239', 'outputs': ['permute_239_0']}
{ 'depth': 163, 'dims': [0, 2, 1, 3], 'has_profiler': False, 'inputs': ['reshape_237_0'], 'name': 'permute_240', 'nop': False, 'op': 'permute', 'original_name': 'permute_240', 'outputs': ['permute_240_0']}
{ 'depth': 163, 'dims': [0, 2, 1, 3], 'has_profiler': False, 'inputs': ['reshape_238_0'], 'name': 'permute_241', 'nop': False, 'op': 'permute', 'original_name': 'permute_241', 'outputs': ['permute_241_0']}
{ 'causal': True, 'depth': 164, 'dropout': 0, 'exec_path': OrderedDict([ ( 'batch_size == 77 && num_heads == 12 && seq_len ' '== 1 && head_sizes == 64', ''), ( 'batch_size == 616 && num_heads == 12 && ' 'seq_len == 1 && head_sizes == 64', '')]), 'has_profiler': False, 'head_size': 64, 'inputs': ['permute_239_0', 'permute_240_0', 'permute_241_0'], 'name': 'mem_eff_attention_242', 'nop': False, 'op': 'mem_eff_attention', 'original_name': 'mem_eff_attention_242', 'outputs': ['mem_eff_attention_242_0'], 'use_grouped_fmha': False, 'variable_seq_length_kv': False, 'variable_seq_length_q': False, 'workspace': 1892352}
{ 'depth': 165, 'has_profiler': False, 'inputs': ['mem_eff_attention_242_0'], 'is_intvar': False, 'name': 'reshape_243', 'nop': False, 'op': 'reshape', 'original_name': 'reshape_243', 'outputs': ['reshape_243_0'], 'shape': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77*batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_243_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_243_0_dim_2', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'unknown_idx': 0}
{ 'alpha': 1.0, 'depth': 167, 'epilogue': 'LinearCombinationResidualBlock', 'epilogue_alignment': 8, 'f_ab_alignment': <function gemm_rcr.init..cal_align_ab at 0x7f9ceceafe50>,
'has_profiler': True,
'input_accessors': [ { '_dim_mapping': [([0], [0]), ([], [1]), ([1], [2])],
'actual_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'reshape_243_0_dim_1',
'nop': False,
'symbolic_value': 1,
'values': [1]},
{ 'depth': 0,
'name': 'reshape_243_0_dim_2',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'reshape_244_0_dim_1',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]},
{ 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_weight_dim_1',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_bias_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'encoder_layers_10_mlp_fc2_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'}],
'inputs': [ 'reshape_243_0',
'encoder_layers_11_self_attn_proj_weight',
'encoder_layers_11_self_attn_proj_bias',
'reshape_231_0'],
'name': 'gemm_rcr_bias_add_245',
'nop': False,
'num_sources': 1,
'op': 'gemm_rcr_bias_add',
'original_name': 'gemm_rcr_bias_add_245',
'output_accessors': [ { '_dim_mapping': [([0], [0]), ([], [1]), ([1], [2])],
'actual_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'reshape_246_0_dim_1',
'nop': False,
'symbolic_value': 1,
'values': [1]},
{ 'depth': 0,
'name': 'reshape_246_0_dim_2',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77*batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'encoder_layers_11_self_attn_proj_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'}],
'outputs': ['reshape_246_0'],
'permute_shape': '',
'split_k': 1,
'workspace': 0}
{ 'beta_constant': None, 'default_normalized_shape': None, 'depth': 169, 'eps': 1e-05, 'gamma_constant': None, 'has_profiler': True, 'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1]), ([2], [2])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_246_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_246_0_dim_2', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'reshape_246_0', 'encoder_layers_11_layer_norm2_weight', 'encoder_layers_11_layer_norm2_bias'], 'name': 'layernorm_247', 'nop': False, 'normalized_shape': [ {'depth': 0, 'name': None, 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'op': 'layernorm', 'original_name': 'layernorm_247', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([1], []), ([2], [1])], 'actual_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_248_0_dim_1', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77*batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_246_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_246_0_dim_2', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['reshape_248_0']}
{ 'alpha': 1.0, 'depth': 171, 'epilogue': 'LinearCombinationFastGELU', 'epilogue_alignment': 8, 'f_ab_alignment': <function gemm_rcr.init..cal_align_ab at 0x7f9ceceafee0>,
'has_profiler': True,
'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'reshape_248_0_dim_1',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'encoder_layers_11_mlp_fc1_weight_dim_0',
'nop': False,
'symbolic_value': 3072,
'values': [3072]},
{ 'depth': 0,
'name': 'encoder_layers_11_mlp_fc1_weight_dim_1',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'encoder_layers_11_mlp_fc1_bias_dim_0',
'nop': False,
'symbolic_value': 3072,
'values': [3072]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'}],
'inputs': [ 'reshape_248_0',
'encoder_layers_11_mlp_fc1_weight',
'encoder_layers_11_mlp_fc1_bias'],
'name': 'gemm_rcr_bias_fast_gelu_249',
'nop': False,
'num_sources': 0,
'op': 'gemm_rcr_bias_fast_gelu',
'original_name': 'gemm_rcr_bias_fast_gelu_249',
'output_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'encoder_layers_11_mlp_fc1_weight_dim_0',
'nop': False,
'symbolic_value': 3072,
'values': [3072]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'}],
'outputs': ['gemm_rcr_bias_fast_gelu_249_0'],
'permute_shape': '',
'split_k': 1,
'workspace': 0}
{ 'alpha': 1.0, 'depth': 172, 'epilogue': 'LinearCombinationResidualBlock', 'epilogue_alignment': 8, 'f_ab_alignment': <function gemm_rcr.init..cal_align_ab at 0x7f9ceceafdc0>,
'has_profiler': True,
'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'encoder_layers_11_mlp_fc1_weight_dim_0',
'nop': False,
'symbolic_value': 3072,
'values': [3072]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'encoder_layers_11_mlp_fc2_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]},
{ 'depth': 0,
'name': 'encoder_layers_11_mlp_fc2_weight_dim_1',
'nop': False,
'symbolic_value': 3072,
'values': [3072]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'encoder_layers_11_mlp_fc2_bias_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'},
{ '_dim_mapping': [([0], [0]), ([1], [1]), ([2], [2])],
'actual_shapes': None,
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'reshape_246_0_dim_1',
'nop': False,
'symbolic_value': 1,
'values': [1]},
{ 'depth': 0,
'name': 'reshape_246_0_dim_2',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'}],
'inputs': [ 'gemm_rcr_bias_fast_gelu_249_0',
'encoder_layers_11_mlp_fc2_weight',
'encoder_layers_11_mlp_fc2_bias',
'reshape_246_0'],
'name': 'gemm_rcr_bias_add_250',
'nop': False,
'num_sources': 1,
'op': 'gemm_rcr_bias_add',
'original_name': 'gemm_rcr_bias_add_250',
'output_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])],
'actual_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'encoder_layers_11_mlp_fc2_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'actual_total_elements_from_stride_dim': None,
'is_contiguous': True,
'is_from_strided_tensor': False,
'offset': 0,
'original_shapes': [ { 'depth': 0,
'name': 'reshape_248_0_dim_0',
'nop': False,
'symbolic_value': 77batch_size,
'values': [77, 616]},
{ 'depth': 0,
'name': 'encoder_layers_11_mlp_fc2_weight_dim_0',
'nop': False,
'symbolic_value': 768,
'values': [768]}],
'original_total_elements_from_stride_dim': None,
'stride_dim': None,
'tensor_dtype': 'float16'}],
'outputs': ['reshape_251_0'],
'permute_shape': '',
'split_k': 1,
'workspace': 0}
{ 'beta_constant': None, 'default_normalized_shape': None, 'depth': 174, 'eps': 1e-05, 'gamma_constant': None, 'has_profiler': True, 'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc2_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'reshape_251_0', 'final_layer_norm_weight', 'final_layer_norm_bias'], 'name': 'layernorm_252', 'nop': False, 'normalized_shape': [ {'depth': 0, 'name': None, 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'op': 'layernorm', 'original_name': 'layernorm_252', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc2_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['output_0']}
Traceback (most recent call last): File "./scripts/compile.py", line 107, in
compile_diffusers()
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1157, in call
return self.main(args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1078, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, ctx.params)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 783, in invoke
return __callback(args, **kwargs)
File "./scripts/compile.py", line 71, in compile_diffusers
compile_clip(
File "/root/AITemplate-rocm/examples/05_stable_diffusion/src/compile_lib/compile_clip.py", line 87, in compile_clip
compile_model(Y, target, "./tmp", "CLIPTextModel", constants=params_ait)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/compiler.py", line 256, in compile_model
graph = compiler.transform.optimize_graph(
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/optimize_graph.py", line 136, in optimize_graph
sorted_graph = func(sorted_graph, workdir)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_strided_ops.py", line 492, in transform_strided_ops
sorted_graph = func(sorted_graph)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_strided_op_and_view_op.py", line 151, in _fuse_strided_op_and_view_op
sorted_graph = _fuse_strided_op_and_view_op_single_pass(sorted_graph)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_strided_op_and_view_op.py", line 122, in _fuse_strided_op_and_view_op_single_pass
return transform_utils.sanitize_sorted_graph(sorted_graph)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_utils.py", line 280, in sanitize_sorted_graph
check_graph_validity(new_sorted_graph, raiseError=True)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_utils.py", line 64, in check_graph_validity
valid = handleError(
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_utils.py", line 47, in handleError
raise RuntimeError(msg)
RuntimeError: Input tensor size_2_0 not established in graph for op expand_3