Closed corsix closed 8 years ago
Potentially fixable along the lines of:
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index de47166..d6d801f 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1157,7 +1157,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
- /* NYI: Check that UREFO is still open and not aliasing a slot. */
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
diff --git a/src/lj_record.c b/src/lj_record.c
index 8a72b0c..335dda7 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -1541,13 +1541,16 @@ noconstify:
/* Note: this effectively limits LJ_MAX_UPVAL to 127. */
uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
if (!uvp->closed) {
+ uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv));
/* In current stack? */
if (uvval(uvp) >= tvref(J->L->stack) &&
uvval(uvp) < tvref(J->L->maxstack)) {
int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
if (slot >= 0) { /* Aliases an SSA slot? */
+ TRef ofs = lj_ir_kint(J, (slot - 1 - LJ_FR2) * 8);
+ TRef sref = emitir(IRT(IR_ADD, IRT_P32), REF_BASE, ofs);
+ emitir(IRTG(IR_EQ, IRT_P32), uref, sref);
slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
- /* NYI: add IR to guard that it's still aliasing the same slot. */
if (val == 0) {
return getslot(J, slot);
} else {
@@ -1557,7 +1560,9 @@ noconstify:
}
}
}
- uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv));
+ emitir(IRTG(IR_UGT, IRT_P32), /* TODO_GC64: P32 -> IGC */
+ emitir(IRT(IR_SUB, IRT_P32), uref, REF_BASE),
+ lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
} else {
needbarrier = 1;
uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv));
Applied with some tuning. uref - k == base
is preferable to uref == base + k
, since uref is otherwise unused and x86/x64 only has two-operand instructions. Thanks!
It looks like an explicit REF_BASE
reference in the IR breaks the register allocator. I'm still trying to understand why.
Reproducible on x86 (32 bit only) with assertions turned on and the test ffi_gcstep_recursive.lua
. The assertion hits in ra_rematk()
because REF_BASE
has gained a spill slot.
It's because of asm_fuseload
. It probably should learn to fuse with &jit_base
instead of forcing a spill.
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 6cd3800..f769c5a 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -317,7 +317,15 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
as->mrm.idx = RID_NONE;
return RID_MRM;
}
- if (ir->o == IR_KNUM) {
+ if (ref == REF_BASE) {
+ RegSet avail = as->freeset & ~as->modset & RSET_FPR;
+ lua_assert(allow != RSET_EMPTY);
+ if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
+ as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base);
+ as->mrm.base = as->mrm.idx = RID_NONE;
+ return RID_MRM;
+ }
+ } else if (ir->o == IR_KNUM) {
RegSet avail = as->freeset & ~as->modset & RSET_FPR;
lua_assert(allow != RSET_EMPTY);
if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
@@ -369,7 +377,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
}
- if (!(as->freeset & allow) && !irref_isk(ref) &&
+ if (!(as->freeset & allow) && !irref_isk(ref) && (ref != REF_BASE) &&
(allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
goto fusespill;
return ra_allocref(as, ref, allow);
!irref_isk(ref) && (ref != REF_BASE)
probably should be !emit_canremat(ref)
Applied with modifications. Thanks!
Quoth http://luajit.org/status.html:
This corresponds to the following from
lj_record.c
:and the following from
lj_asm_x86.h
:And results in the failure of the following testcase: