Closed Chang-LeHung closed 1 year ago
我们要处理的重定向的类型是 R_X86_64_PC32,从名字也能看出来,这是一个 PC-relative 的重定向,就是说,要写入 loc 的值是一个相对于 pc 的偏移量。
根据 elf 手册,R_X86_64_PC32
的计算公式是 S + A - P
,其中:
S = input_section_addr + symbol.value
A = rela.addend
P = input_section_addr + rela.offset
所以 input_section_addr 在计算过程中被约掉了。希望可以解答你的疑问。
但是这个 inputsection_addr 不是不一样吗?
确实是哎,应该再减去 .rodata section 的 size 才对,不知道之前为什么可以正常运行。我最近不太方便测试,你那边是有遇到问题吗?
问题倒是没遇到,没遇到问题是我目前的测试用例都没有生成 rodata 节,那这个接受提交 pr 么?
接受,但最好可以验证一下。用最新版本的 clang 运行复杂点的程序可能会遇到有 rodata 的情况。
您之前有遇到过 rodata 的测试用例吗
您移植好的 lua4.0 能上传到 github 么?
源码丢失了,只有二进制了,已发至你的邮箱。
好的!
在 lua4.0 上测试 life.lua 大概在 1500 个 generation 左右会产生 jit rodata 节的现象,虽然重定位有问题但是没有出现明显错误(还测试了一些其他的程序都没有出现错误)。 通过分析对应 jit 代码的汇编程序可以知道,最终和 rodata 节相关的数据只会影响 cpu state 结构体当中 x14 和 x15 这两个寄存器,riscv calling convention 这两个寄存器是作为函数参数寄存器,之所以没有出现明显错误的原因,可能是在执行 jit 代码之前先将这两个寄存器当中的内存保存到栈上,后面可能没有使用这两个寄存器或者先覆盖然后再写,这种情况下,程序不会出现错误。 jit 代码如下:
#include <stdint.h>
#include <stdbool.h>
#define OFFSET 0x088800000000ULL
#define TO_HOST(addr) (addr + OFFSET)
enum exit_reason_t {
none,
direct_branch,
indirect_branch,
interp,
ecall,
};
typedef union {
uint64_t v;
uint32_t w;
double d;
float f;
} fp_reg_t;
typedef struct {
enum exit_reason_t exit_reason;
uint64_t reenter_pc;
uint64_t gp_regs[32];
fp_reg_t fp_regs[32];
uint64_t pc;
uint32_t fcsr;
} state_t;
void start(volatile state_t *restrict state) {
uint64_t x1 = state->gp_regs[1];
uint64_t x2 = state->gp_regs[2];
uint64_t x8 = state->gp_regs[8];
uint64_t x10 = state->gp_regs[10];
uint64_t x11 = state->gp_regs[11];
uint64_t x12 = state->gp_regs[12];
uint64_t x13 = state->gp_regs[13];
uint64_t x14 = state->gp_regs[14];
uint64_t x15 = state->gp_regs[15];
insn_2ebfc: {
uint64_t rs1 = x2;
uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
x11 = rd;
goto insn_2ec00;
}
insn_2ec00: {
uint64_t rs1 = x2;
uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)32LL);
x15 = rd;
goto insn_2ec04;
}
insn_2ec04: {
uint64_t rs1 = x15;
uint64_t rs2 = x11;
if ((uint64_t)rs1 == (uint64_t)rs2) {
goto insn_2ec54;
}
goto insn_2ec08;
}
insn_2ec08: {
uint64_t rs1 = x2;
x15 = rs1 + (int64_t)2LL;
goto insn_2ec0c;
}
insn_2ec0c: {
uint64_t rs1 = x2;
x14 = rs1 + (int64_t)34LL;
goto insn_2ec10;
}
insn_2ec10: {
uint64_t rs1 = x2;
x12 = rs1 + (int64_t)24LL;
goto insn_2ec12;
}
insn_2ec12: {
uint64_t rs1 = x15;
uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
x13 = rd;
goto insn_2ec16;
}
insn_2ec16: {
uint64_t rs1 = x15;
x15 = rs1 + (int64_t)2LL;
goto insn_2ec18;
}
insn_2ec18: {
uint64_t rs1 = x13;
uint64_t rs2 = 0;
if ((uint64_t)rs1 != (uint64_t)rs2) {
goto insn_2ec8c;
}
goto insn_2ec1a;
}
insn_2ec1a: {
uint64_t rs1 = x14;
uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
x13 = rd;
goto insn_2ec1e;
}
insn_2ec1e: {
uint64_t rs1 = x14;
x14 = rs1 + (int64_t)2LL;
goto insn_2ec20;
}
insn_2ec20: {
uint64_t rs1 = x13;
uint64_t rs2 = 0;
if ((uint64_t)rs1 != (uint64_t)rs2) {
goto insn_2ec8c;
}
goto insn_2ec22;
}
insn_2ec22: {
uint64_t rs1 = x12;
uint64_t rs2 = x15;
if ((uint64_t)rs1 != (uint64_t)rs2) {
goto insn_2ec12;
}
goto insn_2ec26;
}
insn_2ec26: {
uint64_t rs1 = 0;
x10 = rs1 + (int64_t)0LL;
goto insn_2ec28;
}
insn_2ec28: {
uint64_t rs1 = x2;
int64_t rd = *(int64_t *)TO_HOST(rs1 + (int64_t)72LL);
x1 = rd;
goto insn_2ec2a;
}
insn_2ec2a: {
uint64_t rs1 = x2;
int64_t rd = *(int64_t *)TO_HOST(rs1 + (int64_t)64LL);
x8 = rd;
goto insn_2ec2c;
}
insn_2ec2c: {
uint64_t rs1 = x2;
x2 = rs1 + (int64_t)80LL;
goto insn_2ec2e;
}
insn_2ec2e: {
uint64_t rs1 = x1;
state->exit_reason = indirect_branch;
state->reenter_pc = (rs1 + (int64_t)0LL) & ~(uint64_t)1;
goto end;
}
insn_2ec8c: {
uint64_t rs1 = x11;
uint64_t rs2 = 0;
if ((uint64_t)rs1 != (uint64_t)rs2) {
goto insn_2ec9c;
}
goto insn_2ec8e;
}
insn_2ec8e: {
uint64_t rs1 = x2;
int64_t rd = *(int64_t *)TO_HOST(rs1 + (int64_t)72LL);
x1 = rd;
goto insn_2ec90;
}
insn_2ec90: {
uint64_t rs1 = x2;
int64_t rd = *(int64_t *)TO_HOST(rs1 + (int64_t)64LL);
x8 = rd;
goto insn_2ec92;
}
insn_2ec92: {
uint64_t rs1 = 0;
x10 = rs1 + (int64_t)1LL;
goto insn_2ec94;
}
insn_2ec94: {
uint64_t rs1 = x2;
x2 = rs1 + (int64_t)80LL;
goto insn_2ec96;
}
insn_2ec96: {
uint64_t rs1 = x1;
state->exit_reason = indirect_branch;
state->reenter_pc = (rs1 + (int64_t)0LL) & ~(uint64_t)1;
goto end;
}
insn_2ec9c: {
uint64_t rs1 = 0;
x10 = rs1 + (int64_t)-1LL;
goto insn_2ec9e;
}
insn_2ec9e: {
goto insn_2ec28;
}
insn_2ec54: {
uint64_t rs1 = 0;
x10 = rs1 + (int64_t)-1LL;
goto insn_2ec56;
}
insn_2ec56: {
uint64_t rs1 = x11;
uint64_t rs2 = 0;
if ((uint64_t)rs1 == (uint64_t)rs2) {
goto insn_2ec98;
}
goto insn_2ec58;
}
insn_2ec58: {
uint64_t rs1 = 0;
uint64_t rs2 = x2;
x15 = rs1 + rs2;
goto insn_2ec5a;
}
insn_2ec5a: {
uint64_t rs1 = x15;
uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
x12 = rd;
goto insn_2ec5e;
}
insn_2ec5e: {
uint64_t rs1 = x2;
uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)32LL);
x13 = rd;
goto insn_2ec62;
}
insn_2ec62: {
uint64_t rs1 = x2;
x14 = rs1 + (int64_t)32LL;
goto insn_2ec64;
}
insn_2ec64: {
uint64_t rs1 = x2;
x11 = rs1 + (int64_t)24LL;
goto insn_2ec66;
}
insn_2ec66: {
uint64_t rs1 = x15;
x15 = rs1 + (int64_t)2LL;
goto insn_2ec68;
}
insn_2ec68: {
uint64_t rs1 = x14;
x14 = rs1 + (int64_t)2LL;
goto insn_2ec6a;
}
insn_2ec6a: {
uint64_t rs1 = x12;
uint64_t rs2 = x13;
if ((uint64_t)rs1 != (uint64_t)rs2) {
goto insn_2ec82;
}
goto insn_2ec6e;
}
insn_2ec6e: {
uint64_t rs1 = x15;
uint64_t rs2 = x11;
if ((uint64_t)rs1 == (uint64_t)rs2) {
goto insn_2ec26;
}
goto insn_2ec72;
}
insn_2ec72: {
uint64_t rs1 = x15;
uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
x12 = rd;
goto insn_2ec76;
}
insn_2ec76: {
uint64_t rs1 = x14;
uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
x13 = rd;
goto insn_2ec7a;
}
insn_2ec7a: {
uint64_t rs1 = x15;
x15 = rs1 + (int64_t)2LL;
goto insn_2ec7c;
}
insn_2ec7c: {
uint64_t rs1 = x14;
x14 = rs1 + (int64_t)2LL;
goto insn_2ec7e;
}
insn_2ec7e: {
uint64_t rs1 = x12;
uint64_t rs2 = x13;
if ((uint64_t)rs1 == (uint64_t)rs2) {
goto insn_2ec6e;
}
goto insn_2ec82;
}
insn_2ec82: {
uint64_t rs1 = x13;
uint64_t rs2 = x12;
if ((uint64_t)rs1 < (uint64_t)rs2) {
goto insn_2ec28;
}
goto insn_2ec86;
}
insn_2ec86: {
uint64_t rs1 = 0;
uint64_t rs2 = x10;
x10 = (int64_t)(int32_t)(rs1 - rs2);
goto insn_2ec8a;
}
insn_2ec8a: {
goto insn_2ec28;
}
insn_2ec98: {
uint64_t rs1 = 0;
x10 = rs1 + (int64_t)1LL;
goto insn_2ec9a;
}
insn_2ec9a: {
goto insn_2ec58;
}
end:;
state->gp_regs[1] = x1;
state->gp_regs[2] = x2;
state->gp_regs[8] = x8;
state->gp_regs[10] = x10;
state->gp_regs[11] = x11;
state->gp_regs[12] = x12;
state->gp_regs[13] = x13;
state->gp_regs[14] = x14;
state->gp_regs[15] = x15;
}
这个重定位 st_value 是符号在 rodata 节的 offset,rel->r_offset 是到 text 节的 offset,代码当中的的 memory layout 大致如下 [rodata][text] 填入的大小应该是下面大小的相反数,但是老师代码当中为什么是
(u32)((i64)sym->st_value + rel->r_addend - (i64)rel->r_offset);
。 不应该是:符号真实地址 + rel->r_addend - (text真实起始地址 + rel->r_offset) 吗?