@inline function scorep1(opp, me)
isdraw = opp == me
iswin = (opp+0x1 == me) | (me+0x2 == opp)
me + (0x3*isdraw) + (0x6*iswin)
end
@inline function scorep2(opp, target)
mychoice = mod1(opp + mod1(target+0x1, 0x3), 0x3)
mychoice + 0x3*(target-0x1)
end
solve(file::String) = solve(read(file))
function solve(data, f::F=scorep1) where F
l = length(data)
acc = UInt16(0)
@inbounds @simd for idx in 1:4:l
opp = data[idx + 0] - UInt8('A') + 0x1
me = data[idx + 2] - UInt8('X') + 0x1
acc += f(opp, me)
end
acc
end
Breaks vectorization pretty badly. It goes from happily using lots of xmm to only using eax & friends. I just wanted to know how much performance was still left on the table, which is kind of hard to do when the tool breaks the vectorization. I don't yet know how, so this issue is just here for tracking this in general, but it ought to be possible to have our cake & eat it too here.
Adding
mark_start()
to the tight inner loop here:Breaks vectorization pretty badly. It goes from happily using lots of
xmm
to only usingeax
& friends. I just wanted to know how much performance was still left on the table, which is kind of hard to do when the tool breaks the vectorization. I don't yet know how, so this issue is just here for tracking this in general, but it ought to be possible to have our cake & eat it too here.