'stream.async.dispatch' op has invalid Read access range [0 to -96 for -96] of resource

What happened?

for the given IR

module {
  func.func @torch_jit(%arg0: !torch.vtensor<[1,3,320,320],f32>, %arg1: !torch.vtensor<[1],si64>, %arg2:!torch.vtensor<[?,?,1],f32>, %arg3:!torch.vtensor<[?,96,?,?],f32>, %arg4:!torch.vtensor<[3],si64>, %arg5:!torch.vtensor<[],si64>, %arg6:!torch.vtensor<[1,288,20,20],f32>, %arg7:!torch.vtensor<[5],si64>, %arg8:!torch.vtensor<[?,?,288],f32>, %arg9:!torch.vtensor<[?,?,288],f32>, %arg10:!torch.vtensor<[?,?,?],f32>, %arg12: !torch.vtensor<[?,?,?],f32>, %arg13:!torch.vtensor<[96,1,3,3],f32>, %arg14:!torch.vtensor<[],si64>, %arg15:!torch.vtensor<[288],f32>, %arg16:!torch.vtensor<[288,864],f32>, %arg17:!torch.vtensor<[864],f32>, %arg18:!torch.vtensor<[96],f32>, %arg19:!torch.vtensor<[96,1,3,3],f32>, %arg20:!torch.vtensor<[96],f32>) -> !torch.vtensor<[3,?,8,?,?],f32>  attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 21 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "1.12.1"} {
    %853 = torch.operator "onnx.Add"(%arg6, %arg6) : (!torch.vtensor<[1,288,20,20],f32>, !torch.vtensor<[1,288,20,20],f32>) -> !torch.vtensor<[1,288,20,20],f32> 
    %854 = torch.operator "onnx.Shape"(%853) : (!torch.vtensor<[1,288,20,20],f32>) -> !torch.vtensor<[4],si64> 
    %855 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__134> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %856 = torch.operator "onnx.Gather"(%854, %855) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %858 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__136> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %859 = torch.operator "onnx.Add"(%856, %858) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %860 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__137> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %861 = torch.operator "onnx.Div"(%859, %860) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %863 = torch.operator "onnx.Mul"(%861, %arg1) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %864 = torch.operator "onnx.Slice"(%853, %arg1, %863, %855) : (!torch.vtensor<[1,288,20,20],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],f32> 
    %865 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__139> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %866 = torch.operator "onnx.Mul"(%861, %865) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %867 = torch.operator "onnx.Slice"(%853, %863, %866, %855) : (!torch.vtensor<[1,288,20,20],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],f32> 
    %868 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__140> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %869 = torch.operator "onnx.Mul"(%861, %868) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %870 = torch.operator "onnx.Slice"(%853, %866, %869, %855) : (!torch.vtensor<[1,288,20,20],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],f32> 
    %871 = torch.operator "onnx.Conv"(%864, %arg13, %arg18) {torch.onnx.dilations = [1 : si64, 1 : si64], torch.onnx.group = 96 : si64, torch.onnx.kernel_shape = [3 : si64, 3 : si64], torch.onnx.pads = [1 : si64, 1 : si64, 1 : si64, 1 : si64], torch.onnx.strides = [1 : si64, 1 : si64]} : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[96,1,3,3],f32>, !torch.vtensor<[96],f32>) -> !torch.vtensor<[?,96,?,?],f32> 
    %872 = torch.operator "onnx.Add"(%871, %867) : (!torch.vtensor<[?,96,?,?],f32>, !torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,96,?,?],f32> 
    %873 = torch.operator "onnx.Conv"(%872, %arg19, %arg20) {torch.onnx.dilations = [1 : si64, 1 : si64], torch.onnx.group = 96 : si64, torch.onnx.kernel_shape = [3 : si64, 3 : si64], torch.onnx.pads = [1 : si64, 1 : si64, 1 : si64, 1 : si64], torch.onnx.strides = [1 : si64, 1 : si64]} : (!torch.vtensor<[?,96,?,?],f32>, !torch.vtensor<[96,1,3,3],f32>, !torch.vtensor<[96],f32>) -> !torch.vtensor<[?,96,?,?],f32> 
    %874 = torch.operator "onnx.Concat"(%871, %arg3, %870) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[?,96,?,?],f32>, !torch.vtensor<[?,96,?,?],f32>, !torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],f32> 
    %875 = torch.operator "onnx.Shape"(%874) : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[4],si64> 
    %876 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__141> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %877 = torch.operator "onnx.Gather"(%875, %876) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %878 = torch.operator "onnx.Shape"(%874) : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[4],si64> 
    %879 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__142> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %880 = torch.operator "onnx.Gather"(%878, %879) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %881 = torch.operator "onnx.Shape"(%874) : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[4],si64> 
    %882 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__143> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %883 = torch.operator "onnx.Gather"(%881, %882) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %884 = torch.operator "onnx.Shape"(%874) : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[4],si64> 
    %885 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__144> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %886 = torch.operator "onnx.Gather"(%884, %885) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %887 = torch.operator "onnx.Mul"(%883, %886) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %889 = torch.operator "onnx.Unsqueeze"(%877, %arg1) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %891 = torch.operator "onnx.Unsqueeze"(%880, %arg1) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %893 = torch.operator "onnx.Unsqueeze"(%887, %arg1) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %894 = torch.operator "onnx.Concat"(%889, %891, %893) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> 
    %895 = torch.operator "onnx.Reshape"(%874, %894) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],f32> 
    %896 = torch.operator "onnx.Transpose"(%895) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64]} : (!torch.vtensor<[?,?,?],f32>) -> !torch.vtensor<[?,?,?],f32> 
    %897 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<-1> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %898 = torch.operator "onnx.ReduceMean"(%896, %897) : (!torch.vtensor<[?,?,?],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,1],f32> 
    %899 = torch.operator "onnx.Sub"(%896, %898) : (!torch.vtensor<[?,?,?],f32>, !torch.vtensor<[?,?,1],f32>) -> !torch.vtensor<[?,?,?],f32> 
    %906 = torch.operator "onnx.Sqrt"(%arg2) : (!torch.vtensor<[?,?,1],f32>) -> !torch.vtensor<[?,?,1],f32> 
    %907 = torch.operator "onnx.Div"(%899, %906) : (!torch.vtensor<[?,?,?],f32>, !torch.vtensor<[?,?,1],f32>) -> !torch.vtensor<[?,?,?],f32> 
    %908 = torch.operator "onnx.Mul"(%907, %arg15) : (!torch.vtensor<[?,?,?],f32>, !torch.vtensor<[288],f32>) -> !torch.vtensor<[?,?,288],f32> 
    %909 = torch.operator "onnx.Add"(%908, %arg15) : (!torch.vtensor<[?,?,288],f32>, !torch.vtensor<[288],f32>) -> !torch.vtensor<[?,?,288],f32> 
    %910 = torch.operator "onnx.Shape"(%909) : (!torch.vtensor<[?,?,288],f32>) -> !torch.vtensor<[3],si64> 
    %911 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__150> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %912 = torch.operator "onnx.Gather"(%910, %911) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %913 = torch.operator "onnx.Shape"(%909) : (!torch.vtensor<[?,?,288],f32>) -> !torch.vtensor<[3],si64> 
    %914 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__151> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %915 = torch.operator "onnx.Gather"(%913, %914) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %919 = torch.operator "onnx.MatMul"(%909, %arg16) : (!torch.vtensor<[?,?,288],f32>, !torch.vtensor<[288,864],f32>) -> !torch.vtensor<[?,?,864],f32> 
    %920 = torch.operator "onnx.Add"(%arg17, %919) : (!torch.vtensor<[864],f32>, !torch.vtensor<[?,?,864],f32>) -> !torch.vtensor<[?,?,864],f32> 
    %922 = torch.operator "onnx.Unsqueeze"(%912, %arg1) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %924 = torch.operator "onnx.Unsqueeze"(%915, %arg1) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %210 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_onnx__Concat_1376> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %211 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_onnx__Concat_1377> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %212 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_onnx__Concat_1378> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %925 = torch.operator "onnx.Concat"(%922, %924, %210, %211, %212) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> 
    %926 = torch.operator "onnx.Reshape"(%920, %925) : (!torch.vtensor<[?,?,864],f32>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,3,8,?],f32> 
    %927 = torch.operator "onnx.Transpose"(%926) {torch.onnx.perm = [2 : si64, 0 : si64, 3 : si64, 4 : si64, 1 : si64]} : (!torch.vtensor<[?,?,3,8,?],f32>) -> !torch.vtensor<[3,?,8,?,?],f32> 
    return %927: !torch.vtensor<[3,?,8,?,?],f32>
  }
}

{-#
  dialect_resources: {
    builtin: {
      __134: "0x080000000100000000000000",
      __136: "0x080000000200000000000000",
      __137: "0x080000000300000000000000",
      __138: "0x080000000100000000000000",
      __139: "0x080000000200000000000000",
      __140: "0x080000000300000000000000",
      _onnx__Concat_1376: "0x080000000300000000000000",
      _onnx__Concat_1377: "0x080000000800000000000000",
      _onnx__Concat_1378: "0x08000000FFFFFFFFFFFFFFFF",
      __141: "0x080000000000000000000000",
      __142: "0x080000000100000000000000",
      __143: "0x080000000200000000000000",
      __144: "0x080000000300000000000000",
      __145: "0x080000000000000000000000"
   }
}
#-}

getting error as

model.torch_onnx.mlir:64:12: error: 'stream.async.dispatch' op has invalid Read access range [0 to -96 for -96] of resource %62 with size 1382400; start > end
    %927 = torch.operator "onnx.Transpose"(%926) {torch.onnx.perm = [2 : si64, 0 : si64, 3 : si64, 4 : si64, 1 : si64]} : (!torch.vtensor<[?,?,3,8,?],f32>) -> !torch.vtensor<[3,?,8,?,?],f32> 
           ^
model.torch_onnx.mlir:64:12: note: see current operation: %92 = "stream.async.dispatch"(%21, %91, %21, %1, %23, %1, %1, %1) <{affinity = #hal.device.affinity<@__device_0>, entry_points = [@torch_jit$async_dispatch_12::@torch_jit$async_dispatch_12_transpose_3x1x8x1xD_f32], operandSegmentSizes = array<i32: 1, 2, 1, 1, 1, 1, 1>, tied_operands = [-1 : index]}> : (index, !stream.resource<transient>, index, index, index, index, index, index) -> !stream.resource<external>

Dump size with '--mlir-print-ir-after-all --mlir-print-ir-before-all --mlir-disable-threading --mlir-elide-elementsattrs-if-larger=4' is 74M is not able to upload

Steps to reproduce your issue

command:

iree-compile --iree-hal-target-backends=llvm-cpu model.torch_onnx.mlir

What component(s) does this issue relate to?

Compiler

Version information

No response

Additional context