NVIDIA / Fuser

A Fusion Code Generator for NVIDIA GPUs (commonly known as "nvFuser")
Other
271 stars 53 forks source link

replaceSymbolicSizes needs to process the first appearing ID in the same way as the rest of the IDs #3347

Closed naoyam closed 2 weeks ago

naoyam commented 2 weeks ago

Noticed some strange replacement result while working on #3344. Repro:

  auto tv0 = makeSymbolicTensor(3);
  fusion.addInput(tv0);
  auto tv1 = makeSymbolicTensor(2);
  fusion.addInput(tv1);
  auto i0 = IrBuilder::create<Val>(DataType::Index);
  fusion.addInput(i0);

  auto tv2 = reshape(tv0, {i0});
  auto tv3 = reshape(tv1, {i0});
  auto tv4 = add(tv2, tv3);
  fusion.addOutput(tv4);

  ExpressionEvaluator expr_eval;

  expr_eval.bind(tv0->axis(0)->extent(), 2L);
  expr_eval.bind(tv0->axis(1)->extent(), 4L);
  expr_eval.bind(tv0->axis(2)->extent(), 8L);
  expr_eval.bind(tv1->axis(0)->extent(), 8L);
  expr_eval.bind(tv1->axis(1)->extent(), 8L);
  expr_eval.bind(i0, 64L);

  auto initial_info = DynamicTransform::getInitialInfo(&fusion);
  auto info = DynamicTransformConcretizationInfo(&initial_info, &expr_eval);

  DynamicTransform::concretizeFusion(&fusion, &info);

  replaceSymbolicSizes(&fusion);

Before replacement:

Inputs:
  T0_g_float[ iS0{i0}, iS1{i2}, iS2{i3} ]
  T1_g_float[ iS3{i4}, iS4{i5} ]
  i6
Outputs:
  T4_g_float[ iS26{i6} ]

%kernel_math {
T5_l_float[ iS20{( ( i0 * i2 ) * i3 )}rf ] = view( T0_g_float[ iS0{i0}, iS1{i2}, iS2{i3} ] )
T6_l_float[ iS25{( i4 * i5 )}rf ] = view( T1_g_float[ iS3{i4}, iS4{i5} ] )
T4_g_float[ iS26{i6} ]
   = T5_l_float[ iS20{( ( i0 * i2 ) * i3 )}rf ]
   + T6_l_float[ iS25{( i4 * i5 )}rf ];
} // %kernel_math

After replacement:

Inputs:
  T0_g_float[ iS33{( (( (( getMetaData(T0) )).logical_size ))[0] )}, iS34{( (( (( getMetaData(T0) )).logical_size ))[1] )}, iS35{( (( (( getMetaData(T0) )).logical_size ))[2] )} ]
  T1_g_float[ iS36{( (( (( getMetaData(T1) )).logical_size ))[0] )}, iS37{( (( (( getMetaData(T1) )).logical_size ))[1] )} ]
  i6
Outputs:
  T4_g_float[ iS26{i6} ]

%kernel_math {
T5_l_float[ iS20{( ( ( (( (( getMetaData(T0) )).logical_size ))[0] ) * ( (( (( getMetaData(T0) )).logical_size ))[1] ) ) * ( (( (( getMetaData(T0) )).logical_size ))[2] ) )}rf ] = view( T0_g_float[ iS33{( (( (( getMetaData(T0) )).logical_size ))[0] )}, iS34{( (( (( getMetaData(T0) )).logical_size ))[1] )}, iS35{( (( (( getMetaData(T0) )).logical_size ))[2] )} ] )
T6_l_float[ iS32{( ( ( (( (( getMetaData(T0) )).logical_size ))[0] ) * ( (( (( getMetaData(T0) )).logical_size ))[1] ) ) * ( (( (( getMetaData(T0) )).logical_size ))[2] ) )}rf ] = view( T1_g_float[ iS36{( (( (( getMetaData(T1) )).logical_size ))[0] )}, iS37{( (( (( getMetaData(T1) )).logical_size ))[1] )} ] )
T4_g_float[ iS26{i6} ]
   = T5_l_float[ iS20{( ( ( (( (( getMetaData(T0) )).logical_size ))[0] ) * ( (( (( getMetaData(T0) )).logical_size ))[1] ) ) * ( (( (( getMetaData(T0) )).logical_size ))[2] ) )}rf ]
   + T6_l_float[ iS32{( ( ( (( (( getMetaData(T0) )).logical_size ))[0] ) * ( (( (( getMetaData(T0) )).logical_size ))[1] ) ) * ( (( (( getMetaData(T0) )).logical_size ))[2] ) )}rf ];
} // %kernel_math

Notice that T4 is still T4_g_float[ iS26{i6} ].

kevinstephano commented 2 weeks ago

Should this be tagged with a work stream area? Is this a bug or a task? Who should this be assigned to?

naoyam commented 2 weeks ago

I'm working on it #3346. Not strictly related but found a (small) rabbit hole around extent replacements and simplifications while working on slices and concat.

naoyam commented 2 weeks ago

Fixed by #3346