unicorn-engine / unicorn

Unicorn CPU emulator framework (ARM, AArch64, M68K, Mips, Sparc, PowerPC, RiscV, S390x, TriCore, X86)
http://www.unicorn-engine.org
GNU General Public License v2.0
7.33k stars 1.31k forks source link

Execution of xgetbv instruction and setting up of the XCR register. #1955

Open Nitr0-G opened 1 month ago

Nitr0-G commented 1 month ago

Hello, guys! How can I emulate the xgetbv instruction? Judging by the latest updates, there is support for it, but I have not found anywhere examples of configuring the XCR register for it. I wrote the code below, but apparently I still do not have its execution and get: uc_emu_start(uc, ctx.m_ExecuteFromRip, ctx.m_ImageEnd, 0, 0); returned with UC_ERR_INSN_INVALID when xgetbv executing

My code for configuring the XCR register:

    DWORD_PTR XCR0 = _xgetbv(0);

    uc_x86_msr MsrIa32VmxCr0Fixed0;
    MsrIa32VmxCr0Fixed0.rid = (uint32_t)Msr::kIa32VmxCr0Fixed0;
    MsrIa32VmxCr0Fixed0.value = XCR0;

    uc_x86_msr MsrIa32VmxCr0Fixed1;
    MsrIa32VmxCr0Fixed0.rid = (uint32_t)Msr::kIa32VmxCr0Fixed1;
    MsrIa32VmxCr0Fixed0.value = XCR0;

    uc_reg_write(m_uc, UC_X86_REG_MSR, &MsrIa32VmxCr0Fixed0);
    uc_reg_write(m_uc, UC_X86_REG_MSR, &MsrIa32VmxCr0Fixed1);

Verison of UC: I'm using unicorn from the master branch.

Nitr0-G commented 1 month ago

poc

#define X86_CODE64                                                             \
    "\x0F\x01\xD0"

// memory address where emulation starts
#define ADDRESS 0x1000000

static void test_x86_64(void)
{
    uc_engine *uc;
    uc_err err;
    uc_hook trace1, trace2, trace3, trace4;

    int64_t rax = 0x71f3029efd49d41d;
    int64_t rbx = 0xd87b45277f133ddb;
    int64_t rcx = 0xab40d1ffd8afc461;
    int64_t rdx = 0x919317b4a733f01;
    int64_t rsi = 0x4c24e753a17ea358;
    int64_t rdi = 0xe509a57d2571ce96;
    int64_t r8 = 0xea5b108cc2b9ab1f;
    int64_t r9 = 0x19ec097c8eb618c1;
    int64_t r10 = 0xec45774f00c5f682;
    int64_t r11 = 0xe17e9dbec8c074aa;
    int64_t r12 = 0x80f86a8dc0f6d457;
    int64_t r13 = 0x48288ca5671c5492;
    int64_t r14 = 0x595f72f6e4017f6e;
    int64_t r15 = 0x1efd97aea331cccc;

    int64_t rsp = ADDRESS + 0x200000;

    printf("Emulate x86_64 code\n");

    // Initialize emulator in X86-64bit mode
    err = uc_open(UC_ARCH_X86, UC_MODE_64, &uc);
    if (err) {
        printf("Failed on uc_open() with error returned: %u\n", err);
        return;
    }

    // map 2MB memory for this emulation
    uc_mem_map(uc, ADDRESS, 2 * 1024 * 1024, UC_PROT_ALL);

    // write machine code to be emulated to memory
    if (uc_mem_write(uc, ADDRESS, X86_CODE64, sizeof(X86_CODE64) - 1)) {
        printf("Failed to write emulation code to memory, quit!\n");
        return;
    }

    // initialize machine registers
    uc_reg_write(uc, UC_X86_REG_RSP, &rsp);

    uc_reg_write(uc, UC_X86_REG_RAX, &rax);
    uc_reg_write(uc, UC_X86_REG_RBX, &rbx);
    uc_reg_write(uc, UC_X86_REG_RCX, &rcx);
    uc_reg_write(uc, UC_X86_REG_RDX, &rdx);
    uc_reg_write(uc, UC_X86_REG_RSI, &rsi);
    uc_reg_write(uc, UC_X86_REG_RDI, &rdi);
    uc_reg_write(uc, UC_X86_REG_R8, &r8);
    uc_reg_write(uc, UC_X86_REG_R9, &r9);
    uc_reg_write(uc, UC_X86_REG_R10, &r10);
    uc_reg_write(uc, UC_X86_REG_R11, &r11);
    uc_reg_write(uc, UC_X86_REG_R12, &r12);
    uc_reg_write(uc, UC_X86_REG_R13, &r13);
    uc_reg_write(uc, UC_X86_REG_R14, &r14);
    uc_reg_write(uc, UC_X86_REG_R15, &r15);

    // tracing all basic blocks with customized callback
    uc_hook_add(uc, &trace1, UC_HOOK_BLOCK, hook_block, NULL, 1, 0);

    // tracing all instructions in the range [ADDRESS, ADDRESS+20]
    uc_hook_add(uc, &trace2, UC_HOOK_CODE, hook_code64, NULL, ADDRESS,
                ADDRESS + 20);

    // tracing all memory WRITE access (with @begin > @end)
    uc_hook_add(uc, &trace3, UC_HOOK_MEM_WRITE, hook_mem64, NULL, 1, 0);

    // tracing all memory READ access (with @begin > @end)
    uc_hook_add(uc, &trace4, UC_HOOK_MEM_READ, hook_mem64, NULL, 1, 0);

    // emulate machine code in infinite time (last param = 0), or when
    // finishing all the code.
    err = uc_emu_start(uc, ADDRESS, ADDRESS + sizeof(X86_CODE64) - 1, 0, 0);
    if (err) {
        printf("Failed on uc_emu_start() with error returned %u: %s\n", err,
               uc_strerror(err));
    }

    // now print out some registers
    printf(">>> Emulation done. Below is the CPU context\n");

    uc_reg_read(uc, UC_X86_REG_RAX, &rax);
    uc_reg_read(uc, UC_X86_REG_RBX, &rbx);
    uc_reg_read(uc, UC_X86_REG_RCX, &rcx);
    uc_reg_read(uc, UC_X86_REG_RDX, &rdx);
    uc_reg_read(uc, UC_X86_REG_RSI, &rsi);
    uc_reg_read(uc, UC_X86_REG_RDI, &rdi);
    uc_reg_read(uc, UC_X86_REG_R8, &r8);
    uc_reg_read(uc, UC_X86_REG_R9, &r9);
    uc_reg_read(uc, UC_X86_REG_R10, &r10);
    uc_reg_read(uc, UC_X86_REG_R11, &r11);
    uc_reg_read(uc, UC_X86_REG_R12, &r12);
    uc_reg_read(uc, UC_X86_REG_R13, &r13);
    uc_reg_read(uc, UC_X86_REG_R14, &r14);
    uc_reg_read(uc, UC_X86_REG_R15, &r15);

    printf(">>> RAX = 0x%" PRIx64 "\n", rax);
    printf(">>> RBX = 0x%" PRIx64 "\n", rbx);
    printf(">>> RCX = 0x%" PRIx64 "\n", rcx);
    printf(">>> RDX = 0x%" PRIx64 "\n", rdx);
    printf(">>> RSI = 0x%" PRIx64 "\n", rsi);
    printf(">>> RDI = 0x%" PRIx64 "\n", rdi);
    printf(">>> R8 = 0x%" PRIx64 "\n", r8);
    printf(">>> R9 = 0x%" PRIx64 "\n", r9);
    printf(">>> R10 = 0x%" PRIx64 "\n", r10);
    printf(">>> R11 = 0x%" PRIx64 "\n", r11);
    printf(">>> R12 = 0x%" PRIx64 "\n", r12);
    printf(">>> R13 = 0x%" PRIx64 "\n", r13);
    printf(">>> R14 = 0x%" PRIx64 "\n", r14);
    printf(">>> R15 = 0x%" PRIx64 "\n", r15);

    uc_close(uc);
}
Nitr0-G commented 1 month ago

I looked through all the showcases that are presented on the site and did not find the initialization of the XCR register there, as I understand it, it is initialized automatically. I have already tried to study the problem and for some reason, when translating instructions, the check does not pass(https://github.com/unicorn-engine/unicorn/blob/master/qemu/target/i386/translate.c#L8088)

Nitr0-G commented 1 month ago

uc_mem_map => uc_init => x86_reg_reset = reset of all values in spite of uc_ctl_set_cpu_model(uc, UC_CPU_X86_EPYC); изображение

Nitr0-G commented 1 month ago

so for xgetbv, cr4 configuration is still necessary(https://github.com/unicorn-engine/unicorn/blob/master/qemu/target/i386/fpu_helper.c#L1514), but despite this, I do not get access to xgetbv due to the fact that the entire CPUID is reset on uc_mem_map => uc_init and the flag(CPUID_EXT_XSAVE) is removed from s->cpuid_ext_features

Nitr0-G commented 1 month ago

All code is work without cpuid reset(https://github.com/unicorn-engine/unicorn/blob/master/qemu/target/i386/unicorn.c#L68C1-L182C2)

void x86_reg_reset(struct uc_struct *uc)
{
    CPUArchState *env = uc->cpu->env_ptr;

    //env->features[FEAT_1_EDX] = CPUID_CX8 | CPUID_CMOV | CPUID_SSE2 |
    //                            CPUID_FXSR | CPUID_SSE | CPUID_CLFLUSH;
    //env->features[FEAT_1_ECX] = CPUID_EXT_SSSE3 | CPUID_EXT_SSE41 |
    //                            CPUID_EXT_SSE42 | CPUID_EXT_AES |
    //                            CPUID_EXT_CX16;
    //env->features[FEAT_8000_0001_EDX] = CPUID_EXT2_3DNOW | CPUID_EXT2_RDTSCP;
    //env->features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM |
    //                                    CPUID_EXT3_SKINIT | CPUID_EXT3_CR8LEG;
    //env->features[FEAT_7_0_EBX] = CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 |
    //                              CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP;

    memset(env->regs, 0, sizeof(env->regs));
    memset(env->segs, 0, sizeof(env->segs));
    memset(env->cr, 0, sizeof(env->cr));

    memset(&env->ldt, 0, sizeof(env->ldt));
    memset(&env->gdt, 0, sizeof(env->gdt));
    memset(&env->tr, 0, sizeof(env->tr));
    memset(&env->idt, 0, sizeof(env->idt));

    env->eip = 0;
    cpu_load_eflags(env, 0, -1);
    env->cc_op = CC_OP_EFLAGS;

    env->fpstt = 0; /* top of stack index */
    env->fpus = 0;
    env->fpuc = 0;
    memset(env->fptags, 0, sizeof(env->fptags)); /* 0 = valid, 1 = empty */

    env->mxcsr = 0;
    memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
    memset(&env->xmm_t0, 0, sizeof(env->xmm_t0));
    memset(&env->mmx_t0, 0, sizeof(env->mmx_t0));

    memset(env->ymmh_regs, 0, sizeof(env->ymmh_regs));

    memset(env->opmask_regs, 0, sizeof(env->opmask_regs));
    memset(env->zmmh_regs, 0, sizeof(env->zmmh_regs));

    /* sysenter registers */
    env->sysenter_cs = 0;
    env->sysenter_esp = 0;
    env->sysenter_eip = 0;
    env->efer = 0;
    env->star = 0;

    env->vm_hsave = 0;

    env->tsc = 0;
    env->tsc_adjust = 0;
    env->tsc_deadline = 0;

    env->mcg_status = 0;
    env->msr_ia32_misc_enable = 0;
    env->msr_ia32_feature_control = 0;

    env->msr_fixed_ctr_ctrl = 0;
    env->msr_global_ctrl = 0;
    env->msr_global_status = 0;
    env->msr_global_ovf_ctrl = 0;
    memset(env->msr_fixed_counters, 0, sizeof(env->msr_fixed_counters));
    memset(env->msr_gp_counters, 0, sizeof(env->msr_gp_counters));
    memset(env->msr_gp_evtsel, 0, sizeof(env->msr_gp_evtsel));

#ifdef TARGET_X86_64
    memset(env->hi16_zmm_regs, 0, sizeof(env->hi16_zmm_regs));
    env->lstar = 0;
    env->cstar = 0;
    env->fmask = 0;
    env->kernelgsbase = 0;
#endif

    // TODO: reset other registers in CPUX86State qemu/target-i386/cpu.h

    // properly initialize internal setup for each mode
    switch (uc->mode) {
    default:
        break;
    case UC_MODE_16:
        env->hflags = 0;
        env->cr[0] = 0;
        // undo the damage done by the memset of env->segs above
        // for R_CS, not quite the same as x86_cpu_reset
        cpu_x86_load_seg_cache(env, R_CS, 0, 0, 0xffff,
                               DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK |
                                   DESC_R_MASK | DESC_A_MASK);
        // remainder yields same state as x86_cpu_reset
        load_seg_16_helper(env, R_DS, 0);
        load_seg_16_helper(env, R_ES, 0);
        load_seg_16_helper(env, R_SS, 0);
        load_seg_16_helper(env, R_FS, 0);
        load_seg_16_helper(env, R_GS, 0);

        break;
    case UC_MODE_32:
        env->hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_OSFXSR_MASK;
        cpu_x86_update_cr0(env, CR0_PE_MASK); // protected mode
        break;
    case UC_MODE_64:
        env->hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK |
                       HF_LMA_MASK | HF_OSFXSR_MASK;
        env->hflags &= ~(HF_ADDSEG_MASK);
        env->efer |= MSR_EFER_LMA | MSR_EFER_LME; // extended mode activated
        cpu_x86_update_cr0(env, CR0_PE_MASK);     // protected mode
        /* If we are operating in 64bit mode then add the Long Mode flag
         * to the CPUID feature flag
         */
        env->features[FEAT_8000_0001_EDX] |= CPUID_EXT2_LM;
        break;
    }
}

Work code:

Emulate x86_64 code
>>> Tracing basic block at 0x1000000, block size = 0x3
>>> Tracing instruction at 0x1000000, instruction size = 0x3
>>> RIP is 0x1000000
>>> Emulation done. Below is the CPU context
>>> RAX = 0x3
>>> RBX = 0xd87b45277f133ddb
>>> RCX = 0x0
>>> RDX = 0x0
>>> RSI = 0x4c24e753a17ea358
>>> RDI = 0xe509a57d2571ce96
>>> R8 = 0xea5b108cc2b9ab1f
>>> R9 = 0x19ec097c8eb618c1
>>> R10 = 0xec45774f00c5f682
>>> R11 = 0xe17e9dbec8c074aa
>>> R12 = 0x80f86a8dc0f6d457
>>> R13 = 0x48288ca5671c5492
>>> R14 = 0x595f72f6e4017f6e
>>> R15 = 0x1efd97aea331cccc
===================================

POC

#define X86_CODE64                                                             \
    "\x0F\x01\xD0"

// memory address where emulation starts
#define ADDRESS 0x1000000

static void test_x86_64(void)
{
    uc_engine *uc;
    uc_err err;
    uc_hook trace1, trace2, trace3, trace4;

    int64_t rax = 0x71f3029efd49d41d;
    int64_t rbx = 0xd87b45277f133ddb;
    int64_t rcx = 0x0;
    int64_t rdx = 0x919317b4a733f01;
    int64_t rsi = 0x4c24e753a17ea358;
    int64_t rdi = 0xe509a57d2571ce96;
    int64_t r8 = 0xea5b108cc2b9ab1f;
    int64_t r9 = 0x19ec097c8eb618c1;
    int64_t r10 = 0xec45774f00c5f682;
    int64_t r11 = 0xe17e9dbec8c074aa;
    int64_t r12 = 0x80f86a8dc0f6d457;
    int64_t r13 = 0x48288ca5671c5492;
    int64_t r14 = 0x595f72f6e4017f6e;
    int64_t r15 = 0x1efd97aea331cccc;

    int64_t rsp = ADDRESS + 0x200000;

    printf("Emulate x86_64 code\n");

    // Initialize emulator in X86-64bit mode
    err = uc_open(UC_ARCH_X86, UC_MODE_64, &uc);
    uc_ctl_set_cpu_model(uc, UC_CPU_X86_EPYC);
    if (err) {
        printf("Failed on uc_open() with error returned: %u\n", err);
        return;
    }

    // map 2MB memory for this emulation
    uc_mem_map(uc, ADDRESS, 2 * 1024 * 1024, UC_PROT_ALL);

    // write machine code to be emulated to memory
    if (uc_mem_write(uc, ADDRESS, X86_CODE64, sizeof(X86_CODE64) - 1)) {
        printf("Failed to write emulation code to memory, quit!\n");
        return;
    }

    // initialize machine registers
    uc_reg_write(uc, UC_X86_REG_RSP, &rsp);

    /*  CR4 cr4{};
    cr4.x64.Bitmap.VME = true;
    cr4.x64.Bitmap.PVI = true;
    cr4.x64.Bitmap.TSD = false;
    cr4.x64.Bitmap.DE = false;
    cr4.x64.Bitmap.PSE = false;
    cr4.x64.Bitmap.PAE = false;
    cr4.x64.Bitmap.MCE = true;
    cr4.x64.Bitmap.PGE = false;
    cr4.x64.Bitmap.PCE = true;
    cr4.x64.Bitmap.OSFXSR = true;
    cr4.x64.Bitmap.OSXMMEXCPT = true;
    cr4.x64.Bitmap.UMIP = true;
    cr4.x64.Bitmap.LA57 = false;
    cr4.x64.Bitmap.VMXE = false;
    cr4.x64.Bitmap.SMXE = true;
    cr4.x64.Bitmap.FSGSBASE = true;
    cr4.x64.Bitmap.PCIDE = false;
    cr4.x64.Bitmap.OSXSAVE = true;
    cr4.x64.Bitmap.KL = true;
    cr4.x64.Bitmap.SMEP = false;
    cr4.x64.Bitmap.SMAP = false;
    cr4.x64.Bitmap.PKE = false;
    cr4.x64.Bitmap.CET = true;
    cr4.x64.Bitmap.PKS = false;
    cr4.x64.Bitmap.UINTR = false;*/
    DWORD_PTR Cr4 = 0x85cf43;
    uc_reg_write(uc, UC_X86_REG_CR4, &Cr4);

    uc_reg_write(uc, UC_X86_REG_RAX, &rax);
    uc_reg_write(uc, UC_X86_REG_RBX, &rbx);
    uc_reg_write(uc, UC_X86_REG_RCX, &rcx);
    uc_reg_write(uc, UC_X86_REG_RDX, &rdx);
    uc_reg_write(uc, UC_X86_REG_RSI, &rsi);
    uc_reg_write(uc, UC_X86_REG_RDI, &rdi);
    uc_reg_write(uc, UC_X86_REG_R8, &r8);
    uc_reg_write(uc, UC_X86_REG_R9, &r9);
    uc_reg_write(uc, UC_X86_REG_R10, &r10);
    uc_reg_write(uc, UC_X86_REG_R11, &r11);
    uc_reg_write(uc, UC_X86_REG_R12, &r12);
    uc_reg_write(uc, UC_X86_REG_R13, &r13);
    uc_reg_write(uc, UC_X86_REG_R14, &r14);
    uc_reg_write(uc, UC_X86_REG_R15, &r15);

    // tracing all basic blocks with customized callback
    uc_hook_add(uc, &trace1, UC_HOOK_BLOCK, hook_block, NULL, 1, 0);

    // tracing all instructions in the range [ADDRESS, ADDRESS+20]
    uc_hook_add(uc, &trace2, UC_HOOK_CODE, hook_code64, NULL, ADDRESS,
                ADDRESS + 20);

    // tracing all memory WRITE access (with @begin > @end)
    uc_hook_add(uc, &trace3, UC_HOOK_MEM_WRITE, hook_mem64, NULL, 1, 0);

    // tracing all memory READ access (with @begin > @end)
    uc_hook_add(uc, &trace4, UC_HOOK_MEM_READ, hook_mem64, NULL, 1, 0);

    // emulate machine code in infinite time (last param = 0), or when
    // finishing all the code.
    err = uc_emu_start(uc, ADDRESS, ADDRESS + sizeof(X86_CODE64) - 1, 0, 0);
    if (err) {
        printf("Failed on uc_emu_start() with error returned %u: %s\n", err,
               uc_strerror(err));
    }

    // now print out some registers
    printf(">>> Emulation done. Below is the CPU context\n");

    uc_reg_read(uc, UC_X86_REG_RAX, &rax);
    uc_reg_read(uc, UC_X86_REG_RBX, &rbx);
    uc_reg_read(uc, UC_X86_REG_RCX, &rcx);
    uc_reg_read(uc, UC_X86_REG_RDX, &rdx);
    uc_reg_read(uc, UC_X86_REG_RSI, &rsi);
    uc_reg_read(uc, UC_X86_REG_RDI, &rdi);
    uc_reg_read(uc, UC_X86_REG_R8, &r8);
    uc_reg_read(uc, UC_X86_REG_R9, &r9);
    uc_reg_read(uc, UC_X86_REG_R10, &r10);
    uc_reg_read(uc, UC_X86_REG_R11, &r11);
    uc_reg_read(uc, UC_X86_REG_R12, &r12);
    uc_reg_read(uc, UC_X86_REG_R13, &r13);
    uc_reg_read(uc, UC_X86_REG_R14, &r14);
    uc_reg_read(uc, UC_X86_REG_R15, &r15);

    printf(">>> RAX = 0x%" PRIx64 "\n", rax);
    printf(">>> RBX = 0x%" PRIx64 "\n", rbx);
    printf(">>> RCX = 0x%" PRIx64 "\n", rcx);
    printf(">>> RDX = 0x%" PRIx64 "\n", rdx);
    printf(">>> RSI = 0x%" PRIx64 "\n", rsi);
    printf(">>> RDI = 0x%" PRIx64 "\n", rdi);
    printf(">>> R8 = 0x%" PRIx64 "\n", r8);
    printf(">>> R9 = 0x%" PRIx64 "\n", r9);
    printf(">>> R10 = 0x%" PRIx64 "\n", r10);
    printf(">>> R11 = 0x%" PRIx64 "\n", r11);
    printf(">>> R12 = 0x%" PRIx64 "\n", r12);
    printf(">>> R13 = 0x%" PRIx64 "\n", r13);
    printf(">>> R14 = 0x%" PRIx64 "\n", r14);
    printf(">>> R15 = 0x%" PRIx64 "\n", r15);

    uc_close(uc);
}