csarofeen / pytorch

Tensors and Dynamic neural networks in Python with strong GPU acceleration
http://pytorch.org
Other
26 stars 7 forks source link

TransformPropagator is generating inconsistency between the siblings #1760

Closed zasdfgbnm closed 2 years ago

zasdfgbnm commented 2 years ago

🐛 Describe the bug

TEST_F(NVFuserTest, TestTransformPropagatorSibling_CUDA) {
  Fusion fusion;
  FusionGuard fg(&fusion);

  auto tv0 = makeSymbolicTensor(2);
  fusion.addInput(tv0);

  auto tvs = Welford(tv0, {1});
  fusion.addOutput(tvs.var_sum);

  tvs.avg->split(1, 1);
  tvs.avg->split(1, 2);
  tvs.avg->split(1, 3);
  tvs.var_sum->split(1, 1);
  tvs.var_sum->split(1, 2);
  tvs.var_sum->split(1, 3);
  tvs.n->split(1, 1);
  tvs.n->split(1, 2);
  tvs.n->split(1, 3);

  auto tvs2 = tvs.rFactor({1, 4});

  TransformPropagator::from(tvs2.var_sum);

  fusion.print();
}

generates inconsistent T4, T5 and T6

%kernel {
T6_l[ iS60{i1}, iS67{3}rf, iS65{2}rf, rS66{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, rS63{1}rf ](Avg),
T4_l[ iS32{i1}, rS38{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, iS39{3}rf, iS37{2}rf, rS35{1}rf ](Var),
T5_l[ iS49{i1}, iS56{3}rf, iS54{2}rf, rS55{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, rS52{1}rf ](Count)
 = Welford ( T0_g[ iS0{i1}, iS75{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}, iS76{3}, iS74{2}, iS72{1} ](Avg), 
  allreduce = 0 )
T1_l[ iS68{i1}, rS69{3}, rS70{2} ](Avg),
T2_g[ iS40{i1}, rS41{3}, rS42{2} ](Var),
T3_l[ iS57{i1}, rS58{3}, rS59{2} ](Count)
 = Welford ( T6_l[ iS60{i1}, iS67{3}rf, iS65{2}rf, rS66{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, rS63{1}rf ](Avg)
  T4_l[ iS32{i1}, rS38{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, iS39{3}rf, iS37{2}rf, rS35{1}rf ](Var)
  T5_l[ iS49{i1}, iS56{3}rf, iS54{2}rf, rS55{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, rS52{1}rf ](Count)
  allreduce = 0 )

TransformPrinter : 
T0_g[ iS0{i1}, iS75{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}, iS76{3}, iS74{2}, iS72{1} ]
 root domain : (iS0{i1},iS1{i2})
  Split: iS1{i2} by factor 1 -> iS71{( ceilDiv(i2, 1) )}, iS72{1}, start offset: 0, stop offset: 0
  Split: iS71{( ceilDiv(i2, 1) )} by factor 2 -> iS73{( ceilDiv(( ceilDiv(i2, 1) ), 2) )}, iS74{2}, start offset: 0, stop offset: 0
  Split: iS73{( ceilDiv(( ceilDiv(i2, 1) ), 2) )} by factor 3 -> iS75{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}, iS76{3}, start offset: 0, stop offset: 0
T6_l[ iS60{i1}, iS67{3}rf, iS65{2}rf, rS66{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, rS63{1}rf ]
 root domain : (iS60{i1},rS61{i2}rf)
  Split: rS61{i2}rf by factor 1 -> iS62{( ceilDiv(i2, 1) )}rf, rS63{1}rf, start offset: 0, stop offset: 0
  Split: iS62{( ceilDiv(i2, 1) )}rf by factor 2 -> iS64{( ceilDiv(( ceilDiv(i2, 1) ), 2) )}rf, iS65{2}rf, start offset: 0, stop offset: 0
  Split: iS64{( ceilDiv(( ceilDiv(i2, 1) ), 2) )}rf by factor 3 -> rS66{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, iS67{3}rf, start offset: 0, stop offset: 0
 rfactor domain : (iS60{i1},rS66{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf,iS67{3}rf,iS65{2}rf,rS63{1}rf)
T4_l[ iS32{i1}, rS38{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, iS39{3}rf, iS37{2}rf, rS35{1}rf ]
 root domain : (iS32{i1},rS33{i2}rf)
  Split: rS33{i2}rf by factor 1 -> iS34{( ceilDiv(i2, 1) )}rf, rS35{1}rf, start offset: 0, stop offset: 0
  Split: iS34{( ceilDiv(i2, 1) )}rf by factor 2 -> iS36{( ceilDiv(( ceilDiv(i2, 1) ), 2) )}rf, iS37{2}rf, start offset: 0, stop offset: 0
  Split: iS36{( ceilDiv(( ceilDiv(i2, 1) ), 2) )}rf by factor 3 -> rS38{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, iS39{3}rf, start offset: 0, stop offset: 0
 rfactor domain : (iS32{i1},rS38{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf,iS39{3}rf,iS37{2}rf,rS35{1}rf)
T5_l[ iS49{i1}, iS56{3}rf, iS54{2}rf, rS55{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, rS52{1}rf ]
 root domain : (iS49{i1},rS50{i2}rf)
  Split: rS50{i2}rf by factor 1 -> iS51{( ceilDiv(i2, 1) )}rf, rS52{1}rf, start offset: 0, stop offset: 0
  Split: iS51{( ceilDiv(i2, 1) )}rf by factor 2 -> iS53{( ceilDiv(( ceilDiv(i2, 1) ), 2) )}rf, iS54{2}rf, start offset: 0, stop offset: 0
  Split: iS53{( ceilDiv(( ceilDiv(i2, 1) ), 2) )}rf by factor 3 -> rS55{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf, iS56{3}rf, start offset: 0, stop offset: 0
 rfactor domain : (iS49{i1},rS55{( ceilDiv(( ceilDiv(( ceilDiv(i2, 1) ), 2) ), 3) )}rf,iS56{3}rf,iS54{2}rf,rS52{1}rf)
T2_g[ iS40{i1}, rS41{3}, rS42{2} ]
 root domain : (iS40{i1},rS41{3},rS42{2})
T1_l[ iS68{i1}, rS69{3}, rS70{2} ]
 root domain : (iS68{i1},rS69{3},rS70{2})
T3_l[ iS57{i1}, rS58{3}, rS59{2} ]
 root domain : (iS57{i1},rS58{3},rS59{2})
}

Versions

devel

zasdfgbnm commented 2 years ago

Self assign to take a look