ROCm / AITemplate

AITemplate is a Python framework which renders neural network into high performance CUDA/HIP C++ code. Specialized for FP16 TensorCore (NVIDIA GPU) and MatrixCore (AMD GPU) inference.
Apache License 2.0
11 stars 7 forks source link

[Draft]Merging upstream(need support) #72

Closed Boom-Hacker closed 12 months ago

Boom-Hacker commented 1 year ago

[Draft]Merging upstream(need support)

'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'layernorm_232_0', 'encoder_layers_11_self_attn_proj_q_weight', 'encoder_layers_11_self_attn_proj_q_bias'], 'name': 'gemm_rcr_bias_233', 'nop': False, 'num_sources': 0, 'op': 'gemm_rcr_bias', 'original_name': 'gemm_rcr_bias_233', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([], [1]), ([1], [2, 3])], 'actual_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_236_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_236_0_dim_2', 'nop': False, 'symbolic_value': 12, 'values': [12]}, { 'depth': 0, 'name': 'reshape_236_0_dim_3', 'nop': False, 'symbolic_value': 64, 'values': [64]}], 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_q_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['reshape_236_0'], 'permute_shape': '', 'split_k': 1, 'workspace': 0}

{ 'alpha': 1.0, 'depth': 161, 'epilogue': 'LinearCombination', 'epilogue_alignment': 8, 'f_ab_alignment': <function gemm_rcr.init..cal_align_ab at 0x7f9ceceafb80>, 'has_profiler': True, 'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_10_mlp_fc2_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_k_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}, { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_k_weight_dim_1', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_k_bias_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'layernorm_232_0', 'encoder_layers_11_self_attn_proj_k_weight', 'encoder_layers_11_self_attn_proj_k_bias'], 'name': 'gemm_rcr_bias_234', 'nop': False, 'num_sources': 0, 'op': 'gemm_rcr_bias', 'original_name': 'gemm_rcr_bias_234', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([], [1]), ([1], [2, 3])], 'actual_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_237_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_237_0_dim_2', 'nop': False, 'symbolic_value': 12, 'values': [12]}, { 'depth': 0, 'name': 'reshape_237_0_dim_3', 'nop': False, 'symbolic_value': 64, 'values': [64]}], 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77*batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_k_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['reshape_237_0'], 'permute_shape': '', 'split_k': 1, 'workspace': 0}

{ 'alpha': 1.0, 'depth': 161, 'epilogue': 'LinearCombination', 'epilogue_alignment': 8, 'f_ab_alignment': <function gemm_rcr.init..cal_align_ab at 0x7f9ceceaff70>, 'has_profiler': True, 'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_10_mlp_fc2_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_v_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}, { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_v_weight_dim_1', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_v_bias_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'layernorm_232_0', 'encoder_layers_11_self_attn_proj_v_weight', 'encoder_layers_11_self_attn_proj_v_bias'], 'name': 'gemm_rcr_bias_235', 'nop': False, 'num_sources': 0, 'op': 'gemm_rcr_bias', 'original_name': 'gemm_rcr_bias_235', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([], [1]), ([1], [2, 3])], 'actual_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_238_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_238_0_dim_2', 'nop': False, 'symbolic_value': 12, 'values': [12]}, { 'depth': 0, 'name': 'reshape_238_0_dim_3', 'nop': False, 'symbolic_value': 64, 'values': [64]}], 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77*batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_v_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['reshape_238_0'], 'permute_shape': '', 'split_k': 1, 'workspace': 0}

{ 'depth': 163, 'dims': [0, 2, 1, 3], 'has_profiler': False, 'inputs': ['reshape_236_0'], 'name': 'permute_239', 'nop': False, 'op': 'permute', 'original_name': 'permute_239', 'outputs': ['permute_239_0']}

{ 'depth': 163, 'dims': [0, 2, 1, 3], 'has_profiler': False, 'inputs': ['reshape_237_0'], 'name': 'permute_240', 'nop': False, 'op': 'permute', 'original_name': 'permute_240', 'outputs': ['permute_240_0']}

{ 'depth': 163, 'dims': [0, 2, 1, 3], 'has_profiler': False, 'inputs': ['reshape_238_0'], 'name': 'permute_241', 'nop': False, 'op': 'permute', 'original_name': 'permute_241', 'outputs': ['permute_241_0']}

{ 'causal': True, 'depth': 164, 'dropout': 0, 'exec_path': OrderedDict([ ( 'batch_size == 77 && num_heads == 12 && seq_len ' '== 1 && head_sizes == 64', ''), ( 'batch_size == 616 && num_heads == 12 && ' 'seq_len == 1 && head_sizes == 64', '')]), 'has_profiler': False, 'head_size': 64, 'inputs': ['permute_239_0', 'permute_240_0', 'permute_241_0'], 'name': 'mem_eff_attention_242', 'nop': False, 'op': 'mem_eff_attention', 'original_name': 'mem_eff_attention_242', 'outputs': ['mem_eff_attention_242_0'], 'use_grouped_fmha': False, 'variable_seq_length_kv': False, 'variable_seq_length_q': False, 'workspace': 1892352}

{ 'depth': 165, 'has_profiler': False, 'inputs': ['mem_eff_attention_242_0'], 'is_intvar': False, 'name': 'reshape_243', 'nop': False, 'op': 'reshape', 'original_name': 'reshape_243', 'outputs': ['reshape_243_0'], 'shape': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77*batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_243_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_243_0_dim_2', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'unknown_idx': 0}

{ 'alpha': 1.0, 'depth': 167, 'epilogue': 'LinearCombinationResidualBlock', 'epilogue_alignment': 8, 'f_ab_alignment': <function gemm_rcr.init..cal_align_ab at 0x7f9ceceafe50>, 'has_profiler': True, 'input_accessors': [ { '_dim_mapping': [([0], [0]), ([], [1]), ([1], [2])], 'actual_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_243_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_243_0_dim_2', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_244_0_dim_1', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}, { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_weight_dim_1', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_bias_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_10_mlp_fc2_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'reshape_243_0', 'encoder_layers_11_self_attn_proj_weight', 'encoder_layers_11_self_attn_proj_bias', 'reshape_231_0'], 'name': 'gemm_rcr_bias_add_245', 'nop': False, 'num_sources': 1, 'op': 'gemm_rcr_bias_add', 'original_name': 'gemm_rcr_bias_add_245', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([], [1]), ([1], [2])], 'actual_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_246_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_246_0_dim_2', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77*batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_self_attn_proj_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['reshape_246_0'], 'permute_shape': '', 'split_k': 1, 'workspace': 0}

{ 'beta_constant': None, 'default_normalized_shape': None, 'depth': 169, 'eps': 1e-05, 'gamma_constant': None, 'has_profiler': True, 'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1]), ([2], [2])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_246_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_246_0_dim_2', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'reshape_246_0', 'encoder_layers_11_layer_norm2_weight', 'encoder_layers_11_layer_norm2_bias'], 'name': 'layernorm_247', 'nop': False, 'normalized_shape': [ {'depth': 0, 'name': None, 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'op': 'layernorm', 'original_name': 'layernorm_247', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([1], []), ([2], [1])], 'actual_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_248_0_dim_1', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77*batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_246_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_246_0_dim_2', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['reshape_248_0']}

{ 'alpha': 1.0, 'depth': 171, 'epilogue': 'LinearCombinationFastGELU', 'epilogue_alignment': 8, 'f_ab_alignment': <function gemm_rcr.init..cal_align_ab at 0x7f9ceceafee0>, 'has_profiler': True, 'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_248_0_dim_1', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc1_weight_dim_0', 'nop': False, 'symbolic_value': 3072, 'values': [3072]}, { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc1_weight_dim_1', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc1_bias_dim_0', 'nop': False, 'symbolic_value': 3072, 'values': [3072]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'reshape_248_0', 'encoder_layers_11_mlp_fc1_weight', 'encoder_layers_11_mlp_fc1_bias'], 'name': 'gemm_rcr_bias_fast_gelu_249', 'nop': False, 'num_sources': 0, 'op': 'gemm_rcr_bias_fast_gelu', 'original_name': 'gemm_rcr_bias_fast_gelu_249', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc1_weight_dim_0', 'nop': False, 'symbolic_value': 3072, 'values': [3072]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['gemm_rcr_bias_fast_gelu_249_0'], 'permute_shape': '', 'split_k': 1, 'workspace': 0}

{ 'alpha': 1.0, 'depth': 172, 'epilogue': 'LinearCombinationResidualBlock', 'epilogue_alignment': 8, 'f_ab_alignment': <function gemm_rcr.init..cal_align_ab at 0x7f9ceceafdc0>, 'has_profiler': True, 'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc1_weight_dim_0', 'nop': False, 'symbolic_value': 3072, 'values': [3072]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc2_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}, { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc2_weight_dim_1', 'nop': False, 'symbolic_value': 3072, 'values': [3072]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc2_bias_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}, { '_dim_mapping': [([0], [0]), ([1], [1]), ([2], [2])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'reshape_246_0_dim_1', 'nop': False, 'symbolic_value': 1, 'values': [1]}, { 'depth': 0, 'name': 'reshape_246_0_dim_2', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'gemm_rcr_bias_fast_gelu_249_0', 'encoder_layers_11_mlp_fc2_weight', 'encoder_layers_11_mlp_fc2_bias', 'reshape_246_0'], 'name': 'gemm_rcr_bias_add_250', 'nop': False, 'num_sources': 1, 'op': 'gemm_rcr_bias_add', 'original_name': 'gemm_rcr_bias_add_250', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc2_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc2_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['reshape_251_0'], 'permute_shape': '', 'split_k': 1, 'workspace': 0}

{ 'beta_constant': None, 'default_normalized_shape': None, 'depth': 174, 'eps': 1e-05, 'gamma_constant': None, 'has_profiler': True, 'input_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc2_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'inputs': [ 'reshape_251_0', 'final_layer_norm_weight', 'final_layer_norm_bias'], 'name': 'layernorm_252', 'nop': False, 'normalized_shape': [ {'depth': 0, 'name': None, 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'op': 'layernorm', 'original_name': 'layernorm_252', 'output_accessors': [ { '_dim_mapping': [([0], [0]), ([1], [1])], 'actual_shapes': None, 'actual_total_elements_from_stride_dim': None, 'is_contiguous': True, 'is_from_strided_tensor': False, 'offset': 0, 'original_shapes': [ { 'depth': 0, 'name': 'reshape_248_0_dim_0', 'nop': False, 'symbolic_value': 77batch_size, 'values': [77, 616]}, { 'depth': 0, 'name': 'encoder_layers_11_mlp_fc2_weight_dim_0', 'nop': False, 'symbolic_value': 768, 'values': [768]}], 'original_total_elements_from_stride_dim': None, 'stride_dim': None, 'tensor_dtype': 'float16'}], 'outputs': ['output_0']}

Traceback (most recent call last): File "./scripts/compile.py", line 107, in compile_diffusers() File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1157, in call return self.main(args, kwargs) File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1078, in main rv = self.invoke(ctx) File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1434, in invoke return ctx.invoke(self.callback, ctx.params) File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 783, in invoke return __callback(args, **kwargs) File "./scripts/compile.py", line 71, in compile_diffusers compile_clip( File "/root/AITemplate-rocm/examples/05_stable_diffusion/src/compile_lib/compile_clip.py", line 87, in compile_clip compile_model(Y, target, "./tmp", "CLIPTextModel", constants=params_ait) File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/compiler.py", line 256, in compile_model graph = compiler.transform.optimize_graph( File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/optimize_graph.py", line 136, in optimize_graph sorted_graph = func(sorted_graph, workdir) File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_strided_ops.py", line 492, in transform_strided_ops sorted_graph = func(sorted_graph) File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_strided_op_and_view_op.py", line 151, in _fuse_strided_op_and_view_op sorted_graph = _fuse_strided_op_and_view_op_single_pass(sorted_graph) File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_strided_op_and_view_op.py", line 122, in _fuse_strided_op_and_view_op_single_pass return transform_utils.sanitize_sorted_graph(sorted_graph) File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_utils.py", line 280, in sanitize_sorted_graph check_graph_validity(new_sorted_graph, raiseError=True) File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_utils.py", line 64, in check_graph_validity valid = handleError( File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/transform/transform_utils.py", line 47, in handleError raise RuntimeError(msg) RuntimeError: Input tensor size_2_0 not established in graph for op expand_3

Boom-Hacker commented 1 year ago

As of 32b5b3cae649761e67951ea60cd13a48d7439087, the work is normal