ksco / rvemu

A simple and fast RISC-V JIT emulator.
MIT License
118 stars 24 forks source link

jit rodata 重定位的问题 #5

Closed Chang-LeHung closed 1 year ago

Chang-LeHung commented 1 year ago
*loc = (u32)((i64)sym->st_value + rel->r_addend - (i64)rel->r_offset);

这个重定位 st_value 是符号在 rodata 节的 offset,rel->r_offset 是到 text 节的 offset,代码当中的的 memory layout 大致如下 [rodata][text] 填入的大小应该是下面大小的相反数,但是老师代码当中为什么是 (u32)((i64)sym->st_value + rel->r_addend - (i64)rel->r_offset); 。 不应该是:符号真实地址 + rel->r_addend - (text真实起始地址 + rel->r_offset) 吗?

demo

u8 *machine_compile(machine_t *m, str_t source) {
    int saved_stdout = dup(STDOUT_FILENO);
    int outp[2];

    if (pipe(outp) != 0) fatal("cannot make a pipe");
    dup2(outp[1], STDOUT_FILENO);
    close(outp[1]);

    FILE *f;
    f = popen("clang -O3 -c -xc -o /dev/stdout -", "w");
    if (f == NULL) fatal("cannot compile program");
    fwrite(source, 1, str_len(source), f);
    pclose(f);
    fflush(stdout);

    (void) read(outp[0], elfbuf, BINBUF_CAP);
    dup2(saved_stdout, STDOUT_FILENO);

    elf64_ehdr_t *ehdr = (elf64_ehdr_t *)elfbuf;

    /**
     * for some instructions, clang will generate a corresponding .rodata section.
     * this means we need to write a mini-linker that puts the .rodata section into
     * memory, takes its actual address, and uses symbols and relocations to apply
     * it back to the corresponding location in the .text section.
     */

    i64 text_idx = 0, symtab_idx = 0, rela_idx = 0, rodata_idx = 0;
    {
        u64 shstr_shoff = ehdr->e_shoff + ehdr->e_shstrndx * sizeof(elf64_shdr_t);
        elf64_shdr_t *shstr_shdr = (elf64_shdr_t *)(elfbuf + shstr_shoff);
        assert(ehdr->e_shnum != 0);

        for (i64 idx = 0; idx < ehdr->e_shnum; idx++) {
            u64 shoff = ehdr->e_shoff + idx * sizeof(elf64_shdr_t);
            elf64_shdr_t *shdr = (elf64_shdr_t *)(elfbuf + shoff);
            char *str = (char *)(elfbuf + shstr_shdr->sh_offset + shdr->sh_name);
            if (strcmp(str, ".text") == 0) text_idx = idx;
            if (strcmp(str, ".rela.text") == 0) rela_idx = idx;
            if (strncmp(str, ".rodata.", strlen(".rodata.")) == 0) rodata_idx = idx;
            if (strcmp(str, ".symtab") == 0) symtab_idx = idx;
        }
    }

    assert(text_idx != 0 && symtab_idx != 0);

    u64 text_shoff = ehdr->e_shoff + text_idx * sizeof(elf64_shdr_t);
    elf64_shdr_t *text_shdr = (elf64_shdr_t *)(elfbuf + text_shoff);

    if (rela_idx == 0 || rodata_idx == 0) {
        return cache_add(m->cache, m->state.pc, elfbuf + text_shdr->sh_offset,
                         text_shdr->sh_size, text_shdr->sh_addralign);
    }

    u64 text_addr = 0;
    {
        u64 shoff = ehdr->e_shoff + rodata_idx * sizeof(elf64_shdr_t);
        elf64_shdr_t *shdr = (elf64_shdr_t *)(elfbuf + shoff);
        cache_add(m->cache, m->state.pc, elfbuf + shdr->sh_offset,
                  shdr->sh_size, shdr->sh_addralign);
        text_addr = (u64)cache_add(m->cache, m->state.pc, elfbuf + text_shdr->sh_offset,
                                   text_shdr->sh_size, text_shdr->sh_addralign);
    }

    // apply relocations to .text section.
    {
        u64 shoff = ehdr->e_shoff + rela_idx * sizeof(elf64_shdr_t);
        elf64_shdr_t *shdr = (elf64_shdr_t *)(elfbuf + shoff);
        i64 rels = shdr->sh_size / sizeof(elf64_rela_t);

        u64 symtab_shoff = ehdr->e_shoff + symtab_idx * sizeof(elf64_shdr_t);
        elf64_shdr_t *symtab_shdr = (elf64_shdr_t *)(elfbuf + symtab_shoff);

        for (i64 idx = 0; idx < rels; idx++) {
#ifndef __x86_64__
            fatal("only support x86_64 for now");
#endif
            elf64_rela_t *rel = (elf64_rela_t *)(elfbuf + shdr->sh_offset + idx * sizeof(elf64_rela_t));
            assert(rel->r_type == R_X86_64_PC32);

            elf64_sym_t *sym = (elf64_sym_t *)(elfbuf + symtab_shdr->sh_offset + rel->r_sym * sizeof(elf64_sym_t));
            u32 *loc = (u32 *)(text_addr + rel->r_offset);
            *loc = (u32)((i64)sym->st_value + rel->r_addend - (i64)rel->r_offset);
        }
    }

    return (u8 *)text_addr;
}
ksco commented 1 year ago

我们要处理的重定向的类型是 R_X86_64_PC32,从名字也能看出来,这是一个 PC-relative 的重定向,就是说,要写入 loc 的值是一个相对于 pc 的偏移量。

根据 elf 手册,R_X86_64_PC32 的计算公式是 S + A - P,其中:

S = input_section_addr + symbol.value
A = rela.addend
P = input_section_addr + rela.offset

所以 input_section_addr 在计算过程中被约掉了。希望可以解答你的疑问。

Chang-LeHung commented 1 year ago

但是这个 inputsection_addr 不是不一样吗?

ksco commented 1 year ago

确实是哎,应该再减去 .rodata section 的 size 才对,不知道之前为什么可以正常运行。我最近不太方便测试,你那边是有遇到问题吗?

Chang-LeHung commented 1 year ago

问题倒是没遇到,没遇到问题是我目前的测试用例都没有生成 rodata 节,那这个接受提交 pr 么?

ksco commented 1 year ago

接受,但最好可以验证一下。用最新版本的 clang 运行复杂点的程序可能会遇到有 rodata 的情况。

Chang-LeHung commented 1 year ago

您之前有遇到过 rodata 的测试用例吗

ksco commented 1 year ago

有,应该是运行 nbench 或 lua 4.0 的时候遇到了。

Chang-LeHung commented 1 year ago

您移植好的 lua4.0 能上传到 github 么?

ksco commented 1 year ago

源码丢失了,只有二进制了,已发至你的邮箱。

Chang-LeHung commented 1 year ago

好的!

Chang-LeHung commented 1 year ago

在 lua4.0 上测试 life.lua 大概在 1500 个 generation 左右会产生 jit rodata 节的现象,虽然重定位有问题但是没有出现明显错误(还测试了一些其他的程序都没有出现错误)。 通过分析对应 jit 代码的汇编程序可以知道,最终和 rodata 节相关的数据只会影响 cpu state 结构体当中 x14 和 x15 这两个寄存器,riscv calling convention 这两个寄存器是作为函数参数寄存器,之所以没有出现明显错误的原因,可能是在执行 jit 代码之前先将这两个寄存器当中的内存保存到栈上,后面可能没有使用这两个寄存器或者先覆盖然后再写,这种情况下,程序不会出现错误。 jit 代码如下:

#include <stdint.h>
#include <stdbool.h>
#define OFFSET 0x088800000000ULL               
#define TO_HOST(addr) (addr + OFFSET)          
enum exit_reason_t {                           
   none,                                       
   direct_branch,                              
   indirect_branch,                            
   interp,                                     
   ecall,                                      
};                                             
typedef union {                                
    uint64_t v;                                
    uint32_t w;                                
    double d;                                  
    float f;                                   
} fp_reg_t;                                    
typedef struct {                               
    enum exit_reason_t exit_reason;            
    uint64_t reenter_pc;                       
    uint64_t gp_regs[32];                      
    fp_reg_t fp_regs[32];                      
    uint64_t pc;                               
    uint32_t fcsr;                             
} state_t;                                     
void start(volatile state_t *restrict state) { 
    uint64_t x1 = state->gp_regs[1];
    uint64_t x2 = state->gp_regs[2];
    uint64_t x8 = state->gp_regs[8];
    uint64_t x10 = state->gp_regs[10];
    uint64_t x11 = state->gp_regs[11];
    uint64_t x12 = state->gp_regs[12];
    uint64_t x13 = state->gp_regs[13];
    uint64_t x14 = state->gp_regs[14];
    uint64_t x15 = state->gp_regs[15];
insn_2ebfc: {
    uint64_t rs1 = x2;
    uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
    x11 = rd;
    goto insn_2ec00;
}
insn_2ec00: {
    uint64_t rs1 = x2;
    uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)32LL);
    x15 = rd;
    goto insn_2ec04;
}
insn_2ec04: {
    uint64_t rs1 = x15;
    uint64_t rs2 = x11;
    if ((uint64_t)rs1 == (uint64_t)rs2) {
        goto insn_2ec54;
    }
    goto insn_2ec08;
}
insn_2ec08: {
    uint64_t rs1 = x2;
    x15 = rs1 + (int64_t)2LL;
    goto insn_2ec0c;
}
insn_2ec0c: {
    uint64_t rs1 = x2;
    x14 = rs1 + (int64_t)34LL;
    goto insn_2ec10;
}
insn_2ec10: {
    uint64_t rs1 = x2;
    x12 = rs1 + (int64_t)24LL;
    goto insn_2ec12;
}
insn_2ec12: {
    uint64_t rs1 = x15;
    uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
    x13 = rd;
    goto insn_2ec16;
}
insn_2ec16: {
    uint64_t rs1 = x15;
    x15 = rs1 + (int64_t)2LL;
    goto insn_2ec18;
}
insn_2ec18: {
    uint64_t rs1 = x13;
    uint64_t rs2 = 0;
    if ((uint64_t)rs1 != (uint64_t)rs2) {
        goto insn_2ec8c;
    }
    goto insn_2ec1a;
}
insn_2ec1a: {
    uint64_t rs1 = x14;
    uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
    x13 = rd;
    goto insn_2ec1e;
}
insn_2ec1e: {
    uint64_t rs1 = x14;
    x14 = rs1 + (int64_t)2LL;
    goto insn_2ec20;
}
insn_2ec20: {
    uint64_t rs1 = x13;
    uint64_t rs2 = 0;
    if ((uint64_t)rs1 != (uint64_t)rs2) {
        goto insn_2ec8c;
    }
    goto insn_2ec22;
}
insn_2ec22: {
    uint64_t rs1 = x12;
    uint64_t rs2 = x15;
    if ((uint64_t)rs1 != (uint64_t)rs2) {
        goto insn_2ec12;
    }
    goto insn_2ec26;
}
insn_2ec26: {
    uint64_t rs1 = 0;
    x10 = rs1 + (int64_t)0LL;
    goto insn_2ec28;
}
insn_2ec28: {
    uint64_t rs1 = x2;
    int64_t rd = *(int64_t *)TO_HOST(rs1 + (int64_t)72LL);
    x1 = rd;
    goto insn_2ec2a;
}
insn_2ec2a: {
    uint64_t rs1 = x2;
    int64_t rd = *(int64_t *)TO_HOST(rs1 + (int64_t)64LL);
    x8 = rd;
    goto insn_2ec2c;
}
insn_2ec2c: {
    uint64_t rs1 = x2;
    x2 = rs1 + (int64_t)80LL;
    goto insn_2ec2e;
}
insn_2ec2e: {
    uint64_t rs1 = x1;
    state->exit_reason = indirect_branch;
    state->reenter_pc = (rs1 + (int64_t)0LL) & ~(uint64_t)1;
    goto end;
}
insn_2ec8c: {
    uint64_t rs1 = x11;
    uint64_t rs2 = 0;
    if ((uint64_t)rs1 != (uint64_t)rs2) {
        goto insn_2ec9c;
    }
    goto insn_2ec8e;
}
insn_2ec8e: {
    uint64_t rs1 = x2;
    int64_t rd = *(int64_t *)TO_HOST(rs1 + (int64_t)72LL);
    x1 = rd;
    goto insn_2ec90;
}
insn_2ec90: {
    uint64_t rs1 = x2;
    int64_t rd = *(int64_t *)TO_HOST(rs1 + (int64_t)64LL);
    x8 = rd;
    goto insn_2ec92;
}
insn_2ec92: {
    uint64_t rs1 = 0;
    x10 = rs1 + (int64_t)1LL;
    goto insn_2ec94;
}
insn_2ec94: {
    uint64_t rs1 = x2;
    x2 = rs1 + (int64_t)80LL;
    goto insn_2ec96;
}
insn_2ec96: {
    uint64_t rs1 = x1;
    state->exit_reason = indirect_branch;
    state->reenter_pc = (rs1 + (int64_t)0LL) & ~(uint64_t)1;
    goto end;
}
insn_2ec9c: {
    uint64_t rs1 = 0;
    x10 = rs1 + (int64_t)-1LL;
    goto insn_2ec9e;
}
insn_2ec9e: {
    goto insn_2ec28;
}
insn_2ec54: {
    uint64_t rs1 = 0;
    x10 = rs1 + (int64_t)-1LL;
    goto insn_2ec56;
}
insn_2ec56: {
    uint64_t rs1 = x11;
    uint64_t rs2 = 0;
    if ((uint64_t)rs1 == (uint64_t)rs2) {
        goto insn_2ec98;
    }
    goto insn_2ec58;
}
insn_2ec58: {
    uint64_t rs1 = 0;
    uint64_t rs2 = x2;
    x15 = rs1 + rs2;
    goto insn_2ec5a;
}
insn_2ec5a: {
    uint64_t rs1 = x15;
    uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
    x12 = rd;
    goto insn_2ec5e;
}
insn_2ec5e: {
    uint64_t rs1 = x2;
    uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)32LL);
    x13 = rd;
    goto insn_2ec62;
}
insn_2ec62: {
    uint64_t rs1 = x2;
    x14 = rs1 + (int64_t)32LL;
    goto insn_2ec64;
}
insn_2ec64: {
    uint64_t rs1 = x2;
    x11 = rs1 + (int64_t)24LL;
    goto insn_2ec66;
}
insn_2ec66: {
    uint64_t rs1 = x15;
    x15 = rs1 + (int64_t)2LL;
    goto insn_2ec68;
}
insn_2ec68: {
    uint64_t rs1 = x14;
    x14 = rs1 + (int64_t)2LL;
    goto insn_2ec6a;
}
insn_2ec6a: {
    uint64_t rs1 = x12;
    uint64_t rs2 = x13;
    if ((uint64_t)rs1 != (uint64_t)rs2) {
        goto insn_2ec82;
    }
    goto insn_2ec6e;
}
insn_2ec6e: {
    uint64_t rs1 = x15;
    uint64_t rs2 = x11;
    if ((uint64_t)rs1 == (uint64_t)rs2) {
        goto insn_2ec26;
    }
    goto insn_2ec72;
}
insn_2ec72: {
    uint64_t rs1 = x15;
    uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
    x12 = rd;
    goto insn_2ec76;
}
insn_2ec76: {
    uint64_t rs1 = x14;
    uint16_t rd = *(uint16_t *)TO_HOST(rs1 + (int64_t)0LL);
    x13 = rd;
    goto insn_2ec7a;
}
insn_2ec7a: {
    uint64_t rs1 = x15;
    x15 = rs1 + (int64_t)2LL;
    goto insn_2ec7c;
}
insn_2ec7c: {
    uint64_t rs1 = x14;
    x14 = rs1 + (int64_t)2LL;
    goto insn_2ec7e;
}
insn_2ec7e: {
    uint64_t rs1 = x12;
    uint64_t rs2 = x13;
    if ((uint64_t)rs1 == (uint64_t)rs2) {
        goto insn_2ec6e;
    }
    goto insn_2ec82;
}
insn_2ec82: {
    uint64_t rs1 = x13;
    uint64_t rs2 = x12;
    if ((uint64_t)rs1 < (uint64_t)rs2) {
        goto insn_2ec28;
    }
    goto insn_2ec86;
}
insn_2ec86: {
    uint64_t rs1 = 0;
    uint64_t rs2 = x10;
    x10 = (int64_t)(int32_t)(rs1 - rs2);
    goto insn_2ec8a;
}
insn_2ec8a: {
    goto insn_2ec28;
}
insn_2ec98: {
    uint64_t rs1 = 0;
    x10 = rs1 + (int64_t)1LL;
    goto insn_2ec9a;
}
insn_2ec9a: {
    goto insn_2ec58;
}
end:;
    state->gp_regs[1] = x1;
    state->gp_regs[2] = x2;
    state->gp_regs[8] = x8;
    state->gp_regs[10] = x10;
    state->gp_regs[11] = x11;
    state->gp_regs[12] = x12;
    state->gp_regs[13] = x13;
    state->gp_regs[14] = x14;
    state->gp_regs[15] = x15;
}