Open yanbing-j opened 3 months ago
@yanbing-j , have this problem been resolved? We got same error during AOT codegen. If not fixed yet, anyone else has any insight? Thanks!
` C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] Error in codegen for ComputedBuffer(name='buf364', layout=FixedLayout('cpu', torch.float32, size=[s0, 284], stride=[284, 1]), data=Pointwise( C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] 'cpu', C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] torch.float32, C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] def inner_fn(index): C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] i0, i1 = index C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] tmp0 = ops.load(Lself_entity_to_embedding_dense_extractor_indices, i1) C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] tmp1 = ops.load(arg172_1, (tmp0) + 284 * i0) C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] tmp2 = ops.load(Lself_entity_to_embedding_dense_transformer_means, i1) C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] tmp3 = tmp1 - tmp2 C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] tmp4 = ops.load(L__self___entity_to_embedding_dense_transformer_stddevs_reciprocal, i1) C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] tmp5 = tmp3 * tmp4 C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] tmp6 = ops.constant(-10.0, torch.float32) C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] tmp7 = ops.maximum(tmp5, tmp6) C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] tmp8 = ops.constant(10.0, torch.float32) C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] tmp9 = ops.minimum(tmp7, tmp8) C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] return tmp9 C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] , C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] ranges=[s0, 284], C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] origin_node=clamp_max, C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] origins={clamp_max, sub_104, clamp_min, mul_18, index} C0508 23:32:07.937000 133265142647680 torch/_inductor/scheduler.py:781] ))
C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] Error in codegen for ComputedBuffer(name='buf366', layout=FixedLayout('cpu', torch.float32, size=[s0, 20], stride=[20, 1]), data=Pointwise( C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] 'cpu', C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] torch.float32, C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] def inner_fn(index): C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] i0, i1 = index C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] tmp0 = ops.load(_tensor_constant0, i1) C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] tmp1 = ops.load(buf365, (tmp0) + 284 i0) C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] tmp2 = ops.sigmoid(tmp1) C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] tmp3 = ops.load(buf364, (tmp0) + 284 i0) C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] tmp4 = tmp2 * tmp3 C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] return tmp4 C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] , C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] ranges=[s0, 20], C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] origin_node=view_52, C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] origins={index_1, mul_19, sigmoid} C0508 23:32:08.088000 133265142647680 torch/_inductor/scheduler.py:781] ))
C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] Error in codegen for ComputedBuffer(name='buf456', layout=FixedLayout('cpu', torch.float32, size=[s0, 20], stride=[20, 1]), data=Pointwise( C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] 'cpu', C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] torch.float32, C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] def inner_fn(index): C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] i0, i1 = index C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] tmp0 = ops.load(_tensor_constant6, i1) C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] tmp1 = ops.load(buf365, (tmp0) + 284 i0) C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] tmp2 = ops.sigmoid(tmp1) C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] tmp3 = ops.load(buf364, (tmp0) + 284 i0) C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] tmp4 = tmp2 * tmp3 C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] return tmp4 C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] , C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] ranges=[s0, 20], C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] origin_node=view_70, C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] origins={index_7, mul_19, sigmoid} C0508 23:32:09.214000 133265142647680 torch/_inductor/scheduler.py:781] ))
C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] Error in codegen for ComputedBuffer(name='buf546', layout=FixedLayout('cpu', torch.float32, size=[s0, 20], stride=[20, 1]), data=Pointwise( C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] 'cpu', C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] torch.float32, C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] def inner_fn(index): C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] i0, i1 = index C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] tmp0 = ops.load(_tensor_constant12, i1) C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] tmp1 = ops.load(buf365, (tmp0) + 284 i0) C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] tmp2 = ops.sigmoid(tmp1) C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] tmp3 = ops.load(buf364, (tmp0) + 284 i0) C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] tmp4 = tmp2 * tmp3 C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] return tmp4 C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] , C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] ranges=[s0, 20], C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] origin_node=view_88, C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] origins={index_13, mul_19, sigmoid} C0508 23:32:09.904000 133265142647680 torch/_inductor/scheduler.py:781] ))
C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] Error in codegen for ComputedBuffer(name='buf675', layout=AliasedLayout('cpu', torch.float32, size=[s0, 2211], stride=[3235, 1]), data=Pointwise( C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] 'cpu', C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] torch.float32, C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] def inner_fn(index): C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] i0, i1 = index C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] tmp0 = ops.load(Lself_entity_to_embedding_dot_product_module_indices_0, i1) C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] tmp1 = ops.load(Lself_entity_to_embedding_dot_product_module_indices_1, i1) C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] tmp2 = ops.load(buf665, (tmp1) + 67 (tmp0) + 4489 i0) C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] tmp3 = ops.constant(1, torch.float32) C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] tmp4 = tmp2 * tmp3 C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] return tmp4 C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] , C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] ranges=[s0, 2211], C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] origin_node=index_16, C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] origins={mul_81, index_16} C0508 23:32:11.876000 133265142647680 torch/_inductor/scheduler.py:781] )) `
@rapplovin Not fixed. Maintainers have not replyed yet.
Hey, this is an Inductor cudagraph bug, which we are working on fixing it. Meanwhile, there is a workaround to mitigate this.
Oh, sorry, this is a CPU tensor, then probably this is a different issue. I'll have a look soon.
Hi Maintainers @yanboliang @Chillee ,
I encounter codegen error when using
--compile_prefile
in int8 Woq. Although it can still run, it could be confused to users. Could you please fix this?Thanks!