Closed donmor closed 8 months ago
贴一下gdb的汇编输出
x/-100i $pc
No line number information available for address 0x7ffff4a36a5b
0x7ffff4a36a5c <__log>: addi.d $sp, $sp, -16(0xff0)
0x7ffff4a36a60 <__log+4>: fst.d $fa0, $sp, 8(0x8)
0x7ffff4a36a64 <__log+8>: ld.d $t2, $sp, 8(0x8)
0x7ffff4a36a68 <__log+12>: move $t0, $zero
0x7ffff4a36a6c <__log+16>: lu32i.d $t0, 131072(0x20000)
0x7ffff4a36a70 <__log+20>: addi.w $t3, $zero, -1(0xfff)
0x7ffff4a36a74 <__log+24>: lu52i.d $t0, $t0, -1023(0xc01)
0x7ffff4a36a78 <__log+28>: lu32i.d $t3, 198911(0x308ff)
0x7ffff4a36a7c <__log+32>: add.d $t0, $t2, $t0
0x7ffff4a36a80 <__log+36>: bgeu $t3, $t0, 208(0xd0) # 0x7ffff4a36b50 <__log+244>
0x7ffff4a36a84 <__log+40>: srli.d $t1, $t2, 0x30
0x7ffff4a36a88 <__log+44>: lu12i.w $t3, 7(0x7)
0x7ffff4a36a8c <__log+48>: addi.w $t0, $t1, -16(0xff0)
0x7ffff4a36a90 <__log+52>: ori $t3, $t3, 0xfdf
0x7ffff4a36a94 <__log+56>: bltu $t3, $t0, 372(0x174) # 0x7ffff4a36c08 <__log+428>
0x7ffff4a36a98 <__log+60>: move $t1, $zero
0x7ffff4a36a9c <__log+64>: lu32i.d $t1, -393216(0xa0000)
0x7ffff4a36aa0 <__log+68>: lu52i.d $t1, $t1, -1023(0xc01)
0x7ffff4a36aa4 <__log+72>: add.d $t1, $t2, $t1
0x7ffff4a36aa8 <__log+76>: bstrpick.d $t3, $t1, 0x33, 0x2d
0x7ffff4a36aac <__log+80>: addi.d $t3, $t3, 9(0x9)
0x7ffff4a36ab0 <__log+84>: pcalau12i $t0, 85(0x55)
0x7ffff4a36ab4 <__log+88>: addi.d $t0, $t0, 1680(0x690)
0x7ffff4a36ab8 <__log+92>: slli.d $t3, $t3, 0x4
0x7ffff4a36abc <__log+96>: fldx.d $fa2, $t0, $t3
0x7ffff4a36ac0 <__log+100>: add.d $t4, $t0, $t3
0x7ffff4a36ac4 <__log+104>: move $t3, $zero
0x7ffff4a36ac8 <__log+108>: lu52i.d $t3, $t3, -1(0xfff)
0x7ffff4a36acc <__log+112>: and $t3, $t1, $t3
0x7ffff4a36ad0 <__log+116>: srai.d $t1, $t1, 0x34
0x7ffff4a36ad4 <__log+120>: movgr2fr.d $fa1, $t1
0x7ffff4a36ad8 <__log+124>: pcalau12i $t1, 65(0x41)
0x7ffff4a36adc <__log+128>: fld.d $fa3, $t4, 8(0x8)
0x7ffff4a36ae0 <__log+132>: fld.d $fa4, $t1, -728(0xd28)
0x7ffff4a36ae4 <__log+136>: fld.d $fa0, $t0, 0
0x7ffff4a36ae8 <__log+140>: sub.d $t2, $t2, $t3
0x7ffff4a36aec <__log+144>: movgr2fr.d $fa5, $t2
0x7ffff4a36af0 <__log+148>: ffint.d.w $fa1, $fa1
0x7ffff4a36af4 <__log+152>: fmadd.d $fa2, $fa2, $fa5, $fa4
0x7ffff4a36af8 <__log+156>: fmadd.d $fa0, $fa0, $fa1, $fa3
0x7ffff4a36afc <__log+160>: fld.d $fa3, $t0, 48(0x30)
0x7ffff4a36b00 <__log+164>: fld.d $fa4, $t0, 40(0x28)
0x7ffff4a36b04 <__log+168>: fld.d $ft0, $t0, 32(0x20)
0x7ffff4a36b08 <__log+172>: fld.d $ft2, $t0, 24(0x18)
0x7ffff4a36b0c <__log+176>: fld.d $fa6, $t0, 8(0x8)
0x7ffff4a36b10 <__log+180>: fld.d $fa5, $t0, 16(0x10)
0x7ffff4a36b14 <__log+184>: fmul.d $fa7, $fa2, $fa2
0x7ffff4a36b18 <__log+188>: fadd.d $ft1, $fa2, $fa0
0x7ffff4a36b1c <__log+192>: fmadd.d $fa4, $fa3, $fa2, $fa4
0x7ffff4a36b20 <__log+196>: fmadd.d $ft0, $ft0, $fa2, $ft2
0x7ffff4a36b24 <__log+200>: fsub.d $fa3, $fa0, $ft1
0x7ffff4a36b28 <__log+204>: fmul.d $fa0, $fa2, $fa7
0x7ffff4a36b2c <__log+208>: fadd.d $fa2, $fa3, $fa2
0x7ffff4a36b30 <__log+212>: fmadd.d $fa4, $fa4, $fa7, $ft0
0x7ffff4a36b34 <__log+216>: fmadd.d $fa1, $fa6, $fa1, $fa2
0x7ffff4a36b38 <__log+220>: fmadd.d $fa1, $fa5, $fa7, $fa1
0x7ffff4a36b3c <__log+224>: fmadd.d $fa0, $fa0, $fa4, $fa1
0x7ffff4a36b40 <__log+228>: fadd.d $fa0, $fa0, $ft1
0x7ffff4a36b44 <__log+232>: addi.d $sp, $sp, 16(0x10)
0x7ffff4a36b48 <__log+236>: jirl $zero, $ra, 0
0x7ffff4a36b4c <__log+240>: andi $zero, $zero, 0x0
0x7ffff4a36b50 <__log+244>: move $t0, $zero
0x7ffff4a36b54 <__log+248>: lu52i.d $t0, $t0, 1023(0x3ff)
0x7ffff4a36b58 <__log+252>: beq $t2, $t0, 304(0x130) # 0x7ffff4a36c88 <__log+556>
0x7ffff4a36b5c <__log+256>: pcalau12i $t0, 65(0x41)
0x7ffff4a36b60 <__log+260>: fld.d $fa4, $t0, -560(0xdd0)
0x7ffff4a36b64 <__log+264>: pcalau12i $t0, 65(0x41)
0x7ffff4a36b68 <__log+268>: fld.d $fa1, $t0, -712(0xd38)
0x7ffff4a36b6c <__log+272>: pcalau12i $t0, 85(0x55)
0x7ffff4a36b70 <__log+276>: addi.d $t0, $t0, 1680(0x690)
0x7ffff4a36b74 <__log+280>: fsub.d $fa1, $fa0, $fa1
0x7ffff4a36b78 <__log+284>: fld.d $fa5, $t0, 112(0x70)
0x7ffff4a36b7c <__log+288>: fld.d $fa0, $t0, 120(0x78)
0x7ffff4a36b80 <__log+292>: fld.d $fa2, $t0, 96(0x60)
0x7ffff4a36b84 <__log+296>: fmadd.d $ft0, $fa1, $fa4, $fa1
0x7ffff4a36b88 <__log+300>: fneg.d $fa3, $fa1
0x7ffff4a36b8c <__log+304>: fmul.d $fa7, $fa1, $fa1
x/10i $pc
=> 0x7ffff4a36b90 <__log+308>: fmadd.d $fa0, $fa0, $fa1, $fa5
0x7ffff4a36b94 <__log+312>: fld.d $fa6, $t0, 128(0x80)
0x7ffff4a36b98 <__log+316>: fld.d $fa5, $t0, 88(0x58)
0x7ffff4a36b9c <__log+320>: fld.d $ft1, $t0, 56(0x38)
0x7ffff4a36ba0 <__log+324>: fld.d $ft3, $t0, 64(0x40)
0x7ffff4a36ba4 <__log+328>: fmadd.d $fa3, $fa3, $fa4, $ft0
0x7ffff4a36ba8 <__log+332>: fmadd.d $ft2, $fa2, $fa1, $fa5
0x7ffff4a36bac <__log+336>: fmul.d $fa5, $fa1, $fa7
0x7ffff4a36bb0 <__log+340>: fmadd.d $fa0, $fa6, $fa7, $fa0
0x7ffff4a36bb4 <__log+344>: fld.d $fa2, $t0, 136(0x88)
寄存器信息:
(gdb) i registers
r0 0x0 0
r1 0x555555ccfac0 0x555555ccfac0 <voodoo_init(int)+416>
r2 0x7fffe53cf140 0x7fffe53cf140
r3 0x7ffffffeea00 0x7ffffffeea00
r4 0x55555c5e4ab0 93825110264496
r5 0x0 0
r6 0x28 40
r7 0x55555c7a2000 93825112088576
r8 0x55555c7a2028 93825112088616
r9 0x0 0
r10 0x7ffff48248c0 140737295567040
r11 0xd6 214
r12 0x7ffff4a8b690 140737298085520
r13 0x55555a6b8016 93825077575702
r14 0x3ff0080000000000 4607191214893039616
r15 0x308ffffffffff 854320534781951
r16 0xfa7e1 1026017
r17 0x55555c5e2ea0 93825110257312
r18 0x1 1
r19 0x0 0
r20 0x0 0
r21 0x0 0
r22 0x5555566fa000 0x5555566fa000
r23 0x5555580c7310 93825037792016
r24 0x55555c6c9830 93825111201840
r25 0x7ffff4a36a5c 140737297738332
r26 0x5555580c8328 93825037796136
r27 0xffffffff80000000 18446744071562067968
r28 0xffffffff80000000 18446744071562067968
r29 0x7ffff48248c0 140737295567040
r30 0x5555580c7328 93825037792040
r31 0x201 513
orig_a0 0x55555c7c4000 93825112227840
pc 0x7ffff4a36b90 0x7ffff4a36b90 <__log+308>
badv 0x55555c79c000 0x55555c79c000
f0 {f = 0x43b068ff, d = 0xbfb999eb43b068ff} {f = 352.820282, d = -0.10000486757818193}
f1 {f = 0x0, d = 0x3f60000000000000} {f = 0, d = 0.001953125}
f2 {f = 0xa344de30, d = 0x3fc24924a344de30} {f = -1.06722313e-17, d = 0.14285715076560868}
f3 {f = 0x0, d = 0xbf60000000000000} {f = 0, d = -0.001953125}
f4 {f = 0x0, d = 0x41a0000000000000} {f = 0, d = 134217728}
f5 {f = 0x282ad6ca, d = 0x3fbc7184282ad6ca} {f = 9.48347066e-15, d = 0.11110712032936046}
f6 {f = 0x6050403, d = 0xa09080706050403} {f = 2.50174671e-35, d = 2.5437718470676246e-260}
f7 {f = 0x0, d = 0x3ed0000000000000} {f = 0, d = 3.814697265625e-06}
f8 {f = 0x2000000, d = 0x4110000002000000} {f = 9.40395481e-38, d = 262144.001953125}
f9 {f = 0x5c5d8a80, d = 0x55555c5d8a80} {f = 2.49433009e+17, d = 4.6355763674646357e-310}
f10 {f = 0x80000, d = 0x8000000080000} {f = 7.34683969e-40, d = 1.1125369295126334e-308}
f11 {f = 0x8000000, d = 0x800000008000000} {f = 3.85185989e-34, d = 3.7857671085583276e-270}
f12 {f = 0x8, d = 0x8} {f = 1.12103877e-44, d = 3.9525251667299724e-323}
f13 {f = 0xffff, d = 0xffff0000ffff} {f = 9.18340949e-41, d = 1.3906499419328771e-309}
f14 {f = 0xff, d = 0xff000000ff} {f = 3.57331108e-43, d = 5.4110892682213118e-312}
f15 {f = 0x101, d = 0x10100000101} {f = 3.60133705e-43, d = 5.4535291840504986e-312}
f16 {f = 0xffff0000, d = 0xffff00ffffff0000} {f = -nan(0x7f0000), d = -nan(0xf00ffffff0000)}
f17 {f = 0x8, d = 0x8} {f = 1.12103877e-44, d = 3.9525251667299724e-323}
f18 {f = 0xffffffff, d = 0xffffffffffffffff} {f = -nan(0x7fffff), d = -nan(0xfffffffffffff)}
f19 {f = 0xffffffff, d = 0xffffffffffffffff} {f = -nan(0x7fffff), d = -nan(0xfffffffffffff)}
f20 {f = 0xffffffff, d = 0xffffffffffffffff} {f = -nan(0x7fffff), d = -nan(0xfffffffffffff)}
f21 {f = 0xffffffff, d = 0xffffffffffffffff} {f = -nan(0x7fffff), d = -nan(0xfffffffffffff)}
f22 {f = 0xffffffff, d = 0xffffffffffffffff} {f = -nan(0x7fffff), d = -nan(0xfffffffffffff)}
f23 {f = 0xffffffff, d = 0xffffffffffffffff} {f = -nan(0x7fffff), d = -nan(0xfffffffffffff)}
f24 {f = 0x0, d = 0x41e0000000000000} {f = 0, d = 2147483648}
f25 {f = 0x0, d = 0x4150000000000000} {f = 0, d = 4194304}
f26 {f = 0xfefa39ef, d = 0x3fe62e42fefa39ef} {f = -1.66303904e+38, d = 0.69314718055994529}
f27 {f = 0x0, d = 0x3f60000000000000} {f = 0, d = 0.001953125}
f28 {f = 0xffffffff, d = 0xffffffffffffffff} {f = -nan(0x7fffff), d = -nan(0xfffffffffffff)}
f29 {f = 0xffffffff, d = 0xffffffffffffffff} {f = -nan(0x7fffff), d = -nan(0xfffffffffffff)}
f30 {f = 0xffffffff, d = 0xffffffffffffffff} {f = -nan(0x7fffff), d = -nan(0xfffffffffffff)}
f31 {f = 0xffffffff, d = 0xffffffffffffffff} {f = -nan(0x7fffff), d = -nan(0xfffffffffffff)}
fcc0 0x0 0
fcc1 0x1 1
fcc2 0x1 1
fcc3 0x1 1
fcc4 0x1 1
fcc5 0x1 1
fcc6 0x0 0
fcc7 0x0 0
fcsr 0x3df 991
Why doing this? This is damn peculiar...
(gdb) p r $4 = 0.001953125 (gdb) p r2 $5 = 3.814697265625e-06 (gdb) l 64 /* Fix sign of zero with downward rounding when x==1. */ 65 if (WANT_ROUNDING && __glibc_unlikely (ix == asuint64 (1.0))) 66 return 0; 67 r = x - 1.0; 68 r2 = r * r; 69 r3 = r * r2; 70 y = r3 * (B[1] + r * B[2] + r2 * B[3] 71 + r3 * (B[4] + r * B[5] + r2 * B[6] 72 + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10]))); 73 /* Worst-case error is around 0.507 ULP. */
源码中
r2 = r * r
但r和r2实际值明显不是平方关系
?????
$ python3 -c 'print(0.001953125**2)'
3.814697265625e-06
Hmm, but when I test this log
operation on a 3A6000 I only get FE_INEXACT
, and FE_INEXACT
is not enabled by the feexableexcept
line I've referred to.
FCSR is 0x3df, so FE_INEXACT is somehow enabled. This is almost always wrong.
Can you set a breakpoint on feenableexcept
to see when and how FE_INEXACT
is enabled?
(gdb) p r $4 = 0.001953125 (gdb) p r2 $5 = 3.814697265625e-06 (gdb) l 64 /* Fix sign of zero with downward rounding when x==1. */ 65 if (WANT_ROUNDING && __glibc_unlikely (ix == asuint64 (1.0))) 66 return 0; 67 r = x - 1.0; 68 r2 = r * r; 69 r3 = r * r2; 70 y = r3 * (B[1] + r * B[2] + r2 * B[3] 71 + r3 * (B[4] + r * B[5] + r2 * B[6] 72 + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10]))); 73 /* Worst-case error is around 0.507 ULP. */
源码中
r2 = r * r
但r和r2实际值明显不是平方关系?????
$ python3 -c 'print(0.001953125**2)' 3.814697265625e-06
科学记数法看漏了😂我的锅
FCSR is 0x3df, so FE_INEXACT is somehow enabled. This is almost always wrong.
Can you set a breakpoint on
feenableexcept
to see when and howFE_INEXACT
is enabled?
断不到那里就直接报错了ಠ_ಠ
这就怪了…… 能跟踪一下看看 FCSR 的最低位是什么时候被置为 1 的吗?
这就怪了…… 能跟踪一下看看 FCSR 的最低位是什么时候被置为 1 的吗?
watch $fcsr == 0x000003df
直接卡死了😂
我在DEBUG_ShowMsg
放了断点,刚创建窗口时有一段时间fcsr是0x00110000,然后输出LOG: Goes to output 0: name='default' size_mm=(508 x 285)
之后变为0x01110000,输出LOG: ISA BCLK: 8333333.333Hz (25000000/3)
之后变回0x00110000,直到SIGFPE时才变成0x000003df
直接反汇编找出所有 movgr2fcsr 指令然后在它们上设置断点试试?
可能在这里,能找到的从0x00110000变0x000003df最近的点
那个_FPU_SETCW
在/usr/include/fpu_control.h
找到:#define _FPU_SETCW(cw) (void) (cw)
可能在这里,能找到的从0x00110000变0x000003df最近的点 那个
_FPU_SETCW
在/usr/include/fpu_control.h
找到:#define _FPU_SETCW(cw) (void) (cw)
感谢排查,应该是此处了:不同架构的 FCSR 之间没有通用性。此处应该有架构特异性的适配而并没有
一个可移植的写法 (需要 C99):
fenv_t useless;
feholdexcept(&useless);
25行后面加上|| defined(__loongarch__)
屏蔽fpu适配可以跑通了
回头去提个补丁去🙂
joncampbell123/dosbox-x#4887 fixes joncampbell123/dosbox-x#4475
可能还会在此补丁中加入mips支持,有望在3a4000上运行
joncampbell123/dosbox-x#4890 修复上一个补丁没修全的部分 joncampbell123/dosbox-x#4891 正式架构支持及MT32支持
项目仓库:https://github.com/joncampbell123/dosbox-x commit hash: e80b002 运行环境:
编译运行dosbox-x时在某个看起来完全正常的浮点运算表达式上报出SIGFPE。相同commit的源码在x64下可以正常编译运行。 实机运行报错,qemu下运行会直接导致qemu进程卡死。
在gdb中的断点信息:
x和r之间不满足
r = x - 1.0
,原因不明。 设法屏蔽出错代码后另一处浮点运算报出SIGFPE,再次屏蔽后又一处报错,可能在此程序的编译配置下所有浮点运算全部出现紊乱。