mn416 / QPULib

Language and compiler for the Raspberry Pi GPU
Other
430 stars 63 forks source link

Segmentation Fault in computeLiveOut #76

Open mill1000 opened 4 years ago

mill1000 commented 4 years ago

Attempting to implement SHA256 with QPULib and I've encountered a seg fault. Backtrack from GDB when compiled for emulation.

Program received signal SIGSEGV, Segmentation fault.
0x000000000800ddfe in computeLiveOut(Seq<SmallSeq<int> >*, Seq<SmallSeq<int> >*, int, SmallSeq<int>*) ()
(gdb) bt
#0  0x000000000800ddfe in computeLiveOut(Seq<SmallSeq<int> >*, Seq<SmallSeq<int> >*, int, SmallSeq<int>*) ()
#1  0x000000000800df9c in liveness(Seq<Instr>*, Seq<SmallSeq<int> >*, Seq<SmallSeq<int> >*) ()
#2  0x000000000800ea18 in regAlloc(Seq<SmallSeq<int> >*, Seq<Instr>*) ()
#3  0x0000000008002142 in compileKernel(Seq<Instr>*, Stmt*) ()
#4  0x0000000008001be6 in Kernel<Ptr<Int>, Ptr<Int> >::Kernel(void (*)(Ptr<Int>, Ptr<Int>)) ()

Here's a minimum version which will cause the fault.

#include <iostream>
#include "QPULib.h"

static Int smsigma0(Int x) {
    return ror(x, 7) ^ ror(x, 18) ^ (x >> 3);
}

static Int smsigma1(Int x) {
    return ror(x, 17) ^ ror(x, 19) ^ (x >> 10);
}

void execute_sha256_cpu(Ptr<Int> data, Ptr<Int> hash)
{
    Int W[64];
    Int a, b, c, d, e, f, g, h;

    for (uint32_t i = 0; i < 16; i++)
        W[i] = data[i*16];

    for (uint32_t i = 16; i < 64; i++)
        W[i] = smsigma1(W[i-2]) + W[i-7]+ smsigma0(W[i-15]) + W[i-16];
}

int main(int argc, char **argv)
{
    // Compile the function to a QPU kernel k
    auto k = compile(execute_sha256_cpu);

    k.setNumQPUs(1);

    // Allocate and initialise arrays shared between CPU and QPUs
    SharedArray<int> data(16*64), hash(16*64);
    for (uint32_t i = 0; i < 16*64; i++)
    {
        data[i] = 0;
        hash[i] = 0;
    }

    k(&data,&hash);
}

Here's the output of the program when DEBUG is enabled

Source code
===========

v0 = UNIFORM;
v1 = UNIFORM;
v4 = UNIFORM;
v5 = UNIFORM;
v6 = *(v5+(0 << 2));
v7 = *(v5+(16 << 2));
v8 = *(v5+(32 << 2));
v9 = *(v5+(48 << 2));
v10 = *(v5+(64 << 2));
v11 = *(v5+(80 << 2));
v12 = *(v5+(96 << 2));
v13 = *(v5+(112 << 2));
v14 = *(v5+(128 << 2));
v15 = *(v5+(144 << 2));
v16 = *(v5+(160 << 2));
v17 = *(v5+(176 << 2));
v18 = *(v5+(192 << 2));
v19 = *(v5+(208 << 2));
v20 = *(v5+(224 << 2));
v21 = *(v5+(240 << 2));
v78 = v7;
v79 = (((v78 ror 7) ^ (v78 ror 18)) ^ (v78 >> 3));
v80 = v20;
v81 = (((v80 ror 17) ^ (v80 ror 19)) ^ (v80 >> 10));
v22 = (((v81+v15)+v79)+v6);
v82 = v8;
v83 = (((v82 ror 7) ^ (v82 ror 18)) ^ (v82 >> 3));
v84 = v21;
v85 = (((v84 ror 17) ^ (v84 ror 19)) ^ (v84 >> 10));
v23 = (((v85+v16)+v83)+v7);
v86 = v9;
v87 = (((v86 ror 7) ^ (v86 ror 18)) ^ (v86 >> 3));
v88 = v22;
v89 = (((v88 ror 17) ^ (v88 ror 19)) ^ (v88 >> 10));
v24 = (((v89+v17)+v87)+v8);
v90 = v10;
v91 = (((v90 ror 7) ^ (v90 ror 18)) ^ (v90 >> 3));
v92 = v23;
v93 = (((v92 ror 17) ^ (v92 ror 19)) ^ (v92 >> 10));
v25 = (((v93+v18)+v91)+v9);
v94 = v11;
v95 = (((v94 ror 7) ^ (v94 ror 18)) ^ (v94 >> 3));
v96 = v24;
v97 = (((v96 ror 17) ^ (v96 ror 19)) ^ (v96 >> 10));
v26 = (((v97+v19)+v95)+v10);
v98 = v12;
v99 = (((v98 ror 7) ^ (v98 ror 18)) ^ (v98 >> 3));
v100 = v25;
v101 = (((v100 ror 17) ^ (v100 ror 19)) ^ (v100 >> 10));
v27 = (((v101+v20)+v99)+v11);
v102 = v13;
v103 = (((v102 ror 7) ^ (v102 ror 18)) ^ (v102 >> 3));
v104 = v26;
v105 = (((v104 ror 17) ^ (v104 ror 19)) ^ (v104 >> 10));
v28 = (((v105+v21)+v103)+v12);
v106 = v14;
v107 = (((v106 ror 7) ^ (v106 ror 18)) ^ (v106 >> 3));
v108 = v27;
v109 = (((v108 ror 17) ^ (v108 ror 19)) ^ (v108 >> 10));
v29 = (((v109+v22)+v107)+v13);
v110 = v15;
v111 = (((v110 ror 7) ^ (v110 ror 18)) ^ (v110 >> 3));
v112 = v28;
v113 = (((v112 ror 17) ^ (v112 ror 19)) ^ (v112 >> 10));
v30 = (((v113+v23)+v111)+v14);
v114 = v16;
v115 = (((v114 ror 7) ^ (v114 ror 18)) ^ (v114 >> 3));
v116 = v29;
v117 = (((v116 ror 17) ^ (v116 ror 19)) ^ (v116 >> 10));
v31 = (((v117+v24)+v115)+v15);
v118 = v17;
v119 = (((v118 ror 7) ^ (v118 ror 18)) ^ (v118 >> 3));
v120 = v30;
v121 = (((v120 ror 17) ^ (v120 ror 19)) ^ (v120 >> 10));
v32 = (((v121+v25)+v119)+v16);
v122 = v18;
v123 = (((v122 ror 7) ^ (v122 ror 18)) ^ (v122 >> 3));
v124 = v31;
v125 = (((v124 ror 17) ^ (v124 ror 19)) ^ (v124 >> 10));
v33 = (((v125+v26)+v123)+v17);
v126 = v19;
v127 = (((v126 ror 7) ^ (v126 ror 18)) ^ (v126 >> 3));
v128 = v32;
v129 = (((v128 ror 17) ^ (v128 ror 19)) ^ (v128 >> 10));
v34 = (((v129+v27)+v127)+v18);
v130 = v20;
v131 = (((v130 ror 7) ^ (v130 ror 18)) ^ (v130 >> 3));
v132 = v33;
v133 = (((v132 ror 17) ^ (v132 ror 19)) ^ (v132 >> 10));
v35 = (((v133+v28)+v131)+v19);
v134 = v21;
v135 = (((v134 ror 7) ^ (v134 ror 18)) ^ (v134 >> 3));
v136 = v34;
v137 = (((v136 ror 17) ^ (v136 ror 19)) ^ (v136 >> 10));
v36 = (((v137+v29)+v135)+v20);
v138 = v22;
v139 = (((v138 ror 7) ^ (v138 ror 18)) ^ (v138 >> 3));
v140 = v35;
v141 = (((v140 ror 17) ^ (v140 ror 19)) ^ (v140 >> 10));
v37 = (((v141+v30)+v139)+v21);
v142 = v23;
v143 = (((v142 ror 7) ^ (v142 ror 18)) ^ (v142 >> 3));
v144 = v36;
v145 = (((v144 ror 17) ^ (v144 ror 19)) ^ (v144 >> 10));
v38 = (((v145+v31)+v143)+v22);
v146 = v24;
v147 = (((v146 ror 7) ^ (v146 ror 18)) ^ (v146 >> 3));
v148 = v37;
v149 = (((v148 ror 17) ^ (v148 ror 19)) ^ (v148 >> 10));
v39 = (((v149+v32)+v147)+v23);
v150 = v25;
v151 = (((v150 ror 7) ^ (v150 ror 18)) ^ (v150 >> 3));
v152 = v38;
v153 = (((v152 ror 17) ^ (v152 ror 19)) ^ (v152 >> 10));
v40 = (((v153+v33)+v151)+v24);
v154 = v26;
v155 = (((v154 ror 7) ^ (v154 ror 18)) ^ (v154 >> 3));
v156 = v39;
v157 = (((v156 ror 17) ^ (v156 ror 19)) ^ (v156 >> 10));
v41 = (((v157+v34)+v155)+v25);
v158 = v27;
v159 = (((v158 ror 7) ^ (v158 ror 18)) ^ (v158 >> 3));
v160 = v40;
v161 = (((v160 ror 17) ^ (v160 ror 19)) ^ (v160 >> 10));
v42 = (((v161+v35)+v159)+v26);
v162 = v28;
v163 = (((v162 ror 7) ^ (v162 ror 18)) ^ (v162 >> 3));
v164 = v41;
v165 = (((v164 ror 17) ^ (v164 ror 19)) ^ (v164 >> 10));
v43 = (((v165+v36)+v163)+v27);
v166 = v29;
v167 = (((v166 ror 7) ^ (v166 ror 18)) ^ (v166 >> 3));
v168 = v42;
v169 = (((v168 ror 17) ^ (v168 ror 19)) ^ (v168 >> 10));
v44 = (((v169+v37)+v167)+v28);
v170 = v30;
v171 = (((v170 ror 7) ^ (v170 ror 18)) ^ (v170 >> 3));
v172 = v43;
v173 = (((v172 ror 17) ^ (v172 ror 19)) ^ (v172 >> 10));
v45 = (((v173+v38)+v171)+v29);
v174 = v31;
v175 = (((v174 ror 7) ^ (v174 ror 18)) ^ (v174 >> 3));
v176 = v44;
v177 = (((v176 ror 17) ^ (v176 ror 19)) ^ (v176 >> 10));
v46 = (((v177+v39)+v175)+v30);
v178 = v32;
v179 = (((v178 ror 7) ^ (v178 ror 18)) ^ (v178 >> 3));
v180 = v45;
v181 = (((v180 ror 17) ^ (v180 ror 19)) ^ (v180 >> 10));
v47 = (((v181+v40)+v179)+v31);
v182 = v33;
v183 = (((v182 ror 7) ^ (v182 ror 18)) ^ (v182 >> 3));
v184 = v46;
v185 = (((v184 ror 17) ^ (v184 ror 19)) ^ (v184 >> 10));
v48 = (((v185+v41)+v183)+v32);
v186 = v34;
v187 = (((v186 ror 7) ^ (v186 ror 18)) ^ (v186 >> 3));
v188 = v47;
v189 = (((v188 ror 17) ^ (v188 ror 19)) ^ (v188 >> 10));
v49 = (((v189+v42)+v187)+v33);
v190 = v35;
v191 = (((v190 ror 7) ^ (v190 ror 18)) ^ (v190 >> 3));
v192 = v48;
v193 = (((v192 ror 17) ^ (v192 ror 19)) ^ (v192 >> 10));
v50 = (((v193+v43)+v191)+v34);
v194 = v36;
v195 = (((v194 ror 7) ^ (v194 ror 18)) ^ (v194 >> 3));
v196 = v49;
v197 = (((v196 ror 17) ^ (v196 ror 19)) ^ (v196 >> 10));
v51 = (((v197+v44)+v195)+v35);
v198 = v37;
v199 = (((v198 ror 7) ^ (v198 ror 18)) ^ (v198 >> 3));
v200 = v50;
v201 = (((v200 ror 17) ^ (v200 ror 19)) ^ (v200 >> 10));
v52 = (((v201+v45)+v199)+v36);
v202 = v38;
v203 = (((v202 ror 7) ^ (v202 ror 18)) ^ (v202 >> 3));
v204 = v51;
v205 = (((v204 ror 17) ^ (v204 ror 19)) ^ (v204 >> 10));
v53 = (((v205+v46)+v203)+v37);
v206 = v39;
v207 = (((v206 ror 7) ^ (v206 ror 18)) ^ (v206 >> 3));
v208 = v52;
v209 = (((v208 ror 17) ^ (v208 ror 19)) ^ (v208 >> 10));
v54 = (((v209+v47)+v207)+v38);
v210 = v40;
v211 = (((v210 ror 7) ^ (v210 ror 18)) ^ (v210 >> 3));
v212 = v53;
v213 = (((v212 ror 17) ^ (v212 ror 19)) ^ (v212 >> 10));
v55 = (((v213+v48)+v211)+v39);
v214 = v41;
v215 = (((v214 ror 7) ^ (v214 ror 18)) ^ (v214 >> 3));
v216 = v54;
v217 = (((v216 ror 17) ^ (v216 ror 19)) ^ (v216 >> 10));
v56 = (((v217+v49)+v215)+v40);
v218 = v42;
v219 = (((v218 ror 7) ^ (v218 ror 18)) ^ (v218 >> 3));
v220 = v55;
v221 = (((v220 ror 17) ^ (v220 ror 19)) ^ (v220 >> 10));
v57 = (((v221+v50)+v219)+v41);
v222 = v43;
v223 = (((v222 ror 7) ^ (v222 ror 18)) ^ (v222 >> 3));
v224 = v56;
v225 = (((v224 ror 17) ^ (v224 ror 19)) ^ (v224 >> 10));
v58 = (((v225+v51)+v223)+v42);
v226 = v44;
v227 = (((v226 ror 7) ^ (v226 ror 18)) ^ (v226 >> 3));
v228 = v57;
v229 = (((v228 ror 17) ^ (v228 ror 19)) ^ (v228 >> 10));
v59 = (((v229+v52)+v227)+v43);
v230 = v45;
v231 = (((v230 ror 7) ^ (v230 ror 18)) ^ (v230 >> 3));
v232 = v58;
v233 = (((v232 ror 17) ^ (v232 ror 19)) ^ (v232 >> 10));
v60 = (((v233+v53)+v231)+v44);
v234 = v46;
v235 = (((v234 ror 7) ^ (v234 ror 18)) ^ (v234 >> 3));
v236 = v59;
v237 = (((v236 ror 17) ^ (v236 ror 19)) ^ (v236 >> 10));
v61 = (((v237+v54)+v235)+v45);
v238 = v47;
v239 = (((v238 ror 7) ^ (v238 ror 18)) ^ (v238 >> 3));
v240 = v60;
v241 = (((v240 ror 17) ^ (v240 ror 19)) ^ (v240 >> 10));
v62 = (((v241+v55)+v239)+v46);
v242 = v48;
v243 = (((v242 ror 7) ^ (v242 ror 18)) ^ (v242 >> 3));
v244 = v61;
v245 = (((v244 ror 17) ^ (v244 ror 19)) ^ (v244 >> 10));
v63 = (((v245+v56)+v243)+v47);
v246 = v49;
v247 = (((v246 ror 7) ^ (v246 ror 18)) ^ (v246 >> 3));
v248 = v62;
v249 = (((v248 ror 17) ^ (v248 ror 19)) ^ (v248 >> 10));
v64 = (((v249+v57)+v247)+v48);
v250 = v50;
v251 = (((v250 ror 7) ^ (v250 ror 18)) ^ (v250 >> 3));
v252 = v63;
v253 = (((v252 ror 17) ^ (v252 ror 19)) ^ (v252 >> 10));
v65 = (((v253+v58)+v251)+v49);
v254 = v51;
v255 = (((v254 ror 7) ^ (v254 ror 18)) ^ (v254 >> 3));
v256 = v64;
v257 = (((v256 ror 17) ^ (v256 ror 19)) ^ (v256 >> 10));
v66 = (((v257+v59)+v255)+v50);
v258 = v52;
v259 = (((v258 ror 7) ^ (v258 ror 18)) ^ (v258 >> 3));
v260 = v65;
v261 = (((v260 ror 17) ^ (v260 ror 19)) ^ (v260 >> 10));
v67 = (((v261+v60)+v259)+v51);
v262 = v53;
v263 = (((v262 ror 7) ^ (v262 ror 18)) ^ (v262 >> 3));
v264 = v66;
v265 = (((v264 ror 17) ^ (v264 ror 19)) ^ (v264 >> 10));
v68 = (((v265+v61)+v263)+v52);
v266 = v54;
v267 = (((v266 ror 7) ^ (v266 ror 18)) ^ (v266 >> 3));
v268 = v67;
v269 = (((v268 ror 17) ^ (v268 ror 19)) ^ (v268 >> 10));
v69 = (((v269+v62)+v267)+v53);
flush()
If (any(v0==0))
  v270 = (v1-1);
  v271 = 0;
  While (any(v271<v270))
    semaDec(15)
    v271 = (v271+1);
  End
  hostIRQ()
Else
  semaInc(15)
End

Segmentation fault (core dumped)