illinois-ceesd / mirgecom

MIRGE-Com is the workhorse simulation application for the Center for Exascale-Enabled Scramjet Design at the University of Illinois.
Other
11 stars 19 forks source link

Fused kernels appear to have unreachable conditionals #927

Closed nchristensen closed 1 year ago

nchristensen commented 1 year ago

I've been dumping fused kernels at this point in the transformation process to look at them. Some of the kernels have assignment instructions of the following form (white space added for clarity)

_pt_dist_id_171[iel_ensm2, idof_ensm2] = cse_224_subst_0 + ((cse_225_subst_0 + (-1)*((0.5*(cse_95[iel_ensm2, idof_ensm2]*cse_95[iel_ensm2, idof_ensm2] + cse_171[iel_ensm2, idof_ensm2]*cse_171[iel_ensm2, idof_ensm2] + cse_226[iel_ensm2, idof_ensm2]*cse_226[iel_ensm2, idof_ensm2])) / cse_158[iel_ensm2, idof_ensm2])) / cse_158[iel_ensm2, idof_ensm2] + (-1)*8314.46261815324*cse_224_subst_0*(0 + cse_227_subst_0*((2.03611116 + 0.00732270755*cse_224_subst_0 + (-2.2369263833333335e-06)*cse_224_subst_0**2 + (3.680573075e-10)*cse_224_subst_0**3 + (-2.51412122e-14)*cse_224_subst_0**4 + 4939.88614 / cse_224_subst_0 
if cse_224_subst_0 > 1000.0 else 
3.95920148 + (-0.003785261235)*cse_224_subst_0 + (1.9033009733333333e-05)*cse_224_subst_0**2 + (-1.7289718825e-08)*cse_224_subst_0**3 + (5.3976874600000004e-12)*cse_224_subst_0**4 + 5089.77593 / cse_224_subst_0) + -1.0)*0.035645540742853074 + cse_228_subst_0*((3.28253784 + 0.00074154377*cse_224_subst_0 + (-2.526555563333333e-07)*cse_224_subst_0**2 + (5.236763875e-11)*cse_224_subst_0**3 + (-4.33435588e-15)*cse_224_subst_0**4 + (-1088.45772) / cse_224_subst_0 
if cse_224_subst_0 > 1000.0 else
 3.78245636 + (-0.00149836708)*cse_224_subst_0 + (3.282434003333333e-06)*cse_224_subst_0**2 + (-2.4203237725e-09)*cse_224_subst_0**3 + (6.48745674e-13)*cse_224_subst_0**4 + (-1063.94356) / cse_224_subst_0) + -1.0)*0.03125195324707794 + cse_229_subst_0*((3.85746029 + 0.00220718513*cse_224_subst_0 + (-7.382713466666667e-07)*cse_224_subst_0**2 + (1.30872547e-10)*cse_224_subst_0**3 + (-9.44168328e-15)*cse_224_subst_0**4 + (-48759.166) / cse_224_subst_0 
if cse_224_subst_0 > 1000.0 else
 2.35677352 + 0.004492298385*cse_224_subst_0 + (-2.3745208966666665e-06)*cse_224_subst_0**2 + (6.14797555e-10)*cse_224_subst_0**3 + (-2.8739909599999997e-14)*cse_224_subst_0**4 + (-48371.9697) / cse_224_subst_0) + -1.0)*0.022722624917630486 + cse_230_subst_0*((2.71518561 + 0.001031263715*cse_224_subst_0 + (-3.329419236666667e-07)*cse_224_subst_0**2 + (5.7513252e-11)*cse_224_subst_0**3 + (-4.07295432e-15)*cse_224_subst_0**4 + (-14151.8724) / cse_224_subst_0 
if cse_224_subst_0 > 1000.0 else 
3.57953347 + (-0.00030517684)*cse_224_subst_0 + (3.3893811e-07)*cse_224_subst_0**2 + (2.26751471e-10)*cse_224_subst_0**3 + (-1.808848998e-13)*cse_224_subst_0**4 + (-14344.086) / cse_224_subst_0) + -1.0)*0.03570153516601214 + cse_231_subst_0*((3.03399249 + 0.00108845902*cse_224_subst_0 + (-5.469083933333333e-08)*cse_224_subst_0**2 + (-2.426049675e-11)*cse_224_subst_0**3 + (3.36401984e-15)*cse_224_subst_0**4 + (-30004.2971) / cse_224_subst_0 if cse_224_subst_0 > 1000.0 else 4.19864056 + (-0.00101821705)*cse_224_subst_0 + (2.17346737e-06)*cse_224_subst_0**2 + (-1.371992655e-09)*cse_224_subst_0**3 + (3.54395634e-13)*cse_224_subst_0**4 + (-30293.7267) / cse_224_subst_0) + -1.0)*0.055509297807382736 + cse_232_subst_0*((3.3372792 + (-2.470123655e-05)*cse_224_subst_0 + (1.6648559266666665e-07)*cse_224_subst_0**2 + (-4.48915985e-11)*cse_224_subst_0**3 + (4.00510752e-15)*cse_224_subst_0**4 + (-950.158922) / cse_224_subst_0 
if cse_224_subst_0 > 1000.0 else
 2.34433112 + 0.003990260375*cse_224_subst_0 + (-6.4927169999999995e-06)*cse_224_subst_0**2 + (5.03930235e-09)*cse_224_subst_0**3 + (-1.4752235220000002e-12)*cse_224_subst_0**4 + (-917.935173) / cse_224_subst_0) + -1.0)*0.49603174603174605 + cse_233_subst_0*((2.92664 + 0.0007439884*cse_224_subst_0 + (-1.8949200000000001e-07)*cse_224_subst_0**2 + (2.5242595e-11)*cse_224_subst_0**3 + (-1.3506701999999999e-15)*cse_224_subst_0**4 + (-922.7977) / cse_224_subst_0 
if cse_224_subst_0 > 1000.0 else
 3.298677 + 0.0007041202*cse_224_subst_0 + (-1.3210739999999999e-06)*cse_224_subst_0**2 + (1.41037875e-09)*cse_224_subst_0**3 + (-4.889707999999999e-13)*cse_224_subst_0**4 + (-1020.8999) / cse_224_subst_0) + -1.0)*0.03569643749553795)) / (8314.46261815324*(0 + cse_227_subst_0*((2.03611116 + 0.0146454151*cse_224_subst_0 + (-6.71077915e-06)*cse_224_subst_0**2 + (1.47222923e-09)*cse_224_subst_0**3 + (-1.25706061e-13)*cse_224_subst_0**4 
if cse_224_subst_0 > 1000.0 else
 3.95920148 + (-0.00757052247)*cse_224_subst_0 + (5.70990292e-05)*cse_224_subst_0**2 + (-6.91588753e-08)*cse_224_subst_0**3 + (2.69884373e-11)*cse_224_subst_0**4) + -1.0)*0.035645540742853074 + cse_228_subst_0*((3.28253784 + 0.00148308754*cse_224_subst_0 + (-7.57966669e-07)*cse_224_subst_0**2 + (2.09470555e-10)*cse_224_subst_0**3 + (-2.16717794e-14)*cse_224_subst_0**4 
if cse_224_subst_0 > 1000.0 else
 3.78245636 + (-0.00299673416)*cse_224_subst_0 + (9.84730201e-06)*cse_224_subst_0**2 + (-9.68129509e-09)*cse_224_subst_0**3 + (3.24372837e-12)*cse_224_subst_0**4) + -1.0)*0.03125195324707794 + cse_229_subst_0*((3.85746029 + 0.00441437026*cse_224_subst_0 + (-2.21481404e-06)*cse_224_subst_0**2 + (5.23490188e-10)*cse_224_subst_0**3 + (-4.72084164e-14)*cse_224_subst_0**4 
if cse_224_subst_0 > 1000.0 else
 2.35677352 + 0.00898459677*cse_224_subst_0 + (-7.12356269e-06)*cse_224_subst_0**2 + (2.45919022e-09)*cse_224_subst_0**3 + (-1.43699548e-13)*cse_224_subst_0**4) + -1.0)*0.022722624917630486 + cse_230_subst_0*((2.71518561 + 0.00206252743*cse_224_subst_0 + (-9.98825771e-07)*cse_224_subst_0**2 + (2.30053008e-10)*cse_224_subst_0**3 + (-2.03647716e-14)*cse_224_subst_0**4 
if cse_224_subst_0 > 1000.0 else
 3.57953347 + (-0.00061035368)*cse_224_subst_0 + (1.01681433e-06)*cse_224_subst_0**2 + (9.07005884e-10)*cse_224_subst_0**3 + (-9.04424499e-13)*cse_224_subst_0**4) + -1.0)*0.03570153516601214 + cse_231_subst_0*((3.03399249 + 0.00217691804*cse_224_subst_0 + (-1.64072518e-07)*cse_224_subst_0**2 + (-9.7041987e-11)*cse_224_subst_0**3 + (1.68200992e-14)*cse_224_subst_0**4 
if cse_224_subst_0 > 1000.0 else
 4.19864056 + (-0.0020364341)*cse_224_subst_0 + (6.52040211e-06)*cse_224_subst_0**2 + (-5.48797062e-09)*cse_224_subst_0**3 + (1.77197817e-12)*cse_224_subst_0**4) + -1.0)*0.055509297807382736 + cse_232_subst_0*((3.3372792 + (-4.94024731e-05)*cse_224_subst_0 + (4.99456778e-07)*cse_224_subst_0**2 + (-1.79566394e-10)*cse_224_subst_0**3 + (2.00255376e-14)*cse_224_subst_0**4 
if cse_224_subst_0 > 1000.0 else
 2.34433112 + 0.00798052075*cse_224_subst_0 + (-1.9478151e-05)*cse_224_subst_0**2 + (2.01572094e-08)*cse_224_subst_0**3 + (-7.37611761e-12)*cse_224_subst_0**4) + -1.0)*0.49603174603174605 + cse_233_subst_0*((2.92664 + 0.0014879768*cse_224_subst_0 + (-5.68476e-07)*cse_224_subst_0**2 + (1.0097038e-10)*cse_224_subst_0**3 + (-6.753351e-15)*cse_224_subst_0**4 
if cse_224_subst_0 > 1000.0 else
 3.298677 + 0.0014082404*cse_224_subst_0 + (-3.963222e-06)*cse_224_subst_0**2 + (5.641515e-09)*cse_224_subst_0**3 + (-2.444854e-12)*cse_224_subst_0**4) + -1.0)*0.03569643749553795))  {id=_pt_dist_id_171_store, tags=EinsumTag(orig_loop_nest=frozenset({'idof_0_0_', 'iel_0_2_'}))}

Unless I misunderstand how conditionals work in loopy, it looks to me that all of the clauses after the second if-else are unreachable. This hurts prefetching because some of these unreachable clauses can involve dof arrays that compete take up local memory.

nchristensen commented 1 year ago

This is from a smoke_test_ks_3d run with the current default emirge package versions.

nchristensen commented 1 year ago

The kernel is actually fine. Loopy's representation of if-else statements appears to be ambiguous as there is no way to see where the else condition terminates from its string representation.