microsoft / llvm-mctoll

llvm-mctoll
Other
816 stars 125 forks source link

[x86] pointer type support #201

Closed tianboh closed 4 months ago

tianboh commented 4 months ago

Hi there,

I am trying to disassemble a toy c file.

#include <stdlib.h>

int* ptradd(int* a, int * b){
        int* c = malloc(sizeof(int));
        *c = *a + *b;
        return c;
}

int main(){
        int x = 0;
        int y = 1;
        int* c = ptradd(&x, &y);
        return 0;
}

Then compile as below

root@9912084c44ab:/workspace/swap# gcc -v toy.c
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/9/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:hsa
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 9.4.0-1ubuntu1~20.04.2' --with-bugurl=file:///usr/share/doc/gcc-9/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,gm2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-9 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-9-9QDOt0/gcc-9-9.4.0/debian/tmp-nvptx/usr,hsa --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~20.04.2)
COLLECT_GCC_OPTIONS='-v' '-mtune=generic' '-march=x86-64'
 /usr/lib/gcc/x86_64-linux-gnu/9/cc1 -quiet -v -imultiarch x86_64-linux-gnu toy.c -quiet -dumpbase toy.c -mtune=generic -march=x86-64 -auxbase toy -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccOUyVzf.s
GNU C17 (Ubuntu 9.4.0-1ubuntu1~20.04.2) version 9.4.0 (x86_64-linux-gnu)
        compiled by GNU C version 9.4.0, GMP version 6.2.0, MPFR version 4.0.2, MPC version 1.1.0, isl version isl-0.22.1-GMP

GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"
ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/9/include-fixed"
ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include"
#include "..." search starts here:
#include <...> search starts here:
 /usr/lib/gcc/x86_64-linux-gnu/9/include
 /usr/local/include
 /usr/include/x86_64-linux-gnu
 /usr/include
End of search list.
GNU C17 (Ubuntu 9.4.0-1ubuntu1~20.04.2) version 9.4.0 (x86_64-linux-gnu)
        compiled by GNU C version 9.4.0, GMP version 6.2.0, MPFR version 4.0.2, MPC version 1.1.0, isl version isl-0.22.1-GMP

GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
Compiler executable checksum: 01da938ff5dc2163489aa33cb3b747a7
COLLECT_GCC_OPTIONS='-v' '-mtune=generic' '-march=x86-64'
 as -v --64 -o /tmp/cc0bluCe.o /tmp/ccOUyVzf.s
GNU assembler version 2.34 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.34
COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/9/:/usr/lib/gcc/x86_64-linux-gnu/9/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/9/:/usr/lib/gcc/x86_64-linux-gnu/
LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/9/:/usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/9/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/9/../../../:/lib/:/usr/lib/
COLLECT_GCC_OPTIONS='-v' '-mtune=generic' '-march=x86-64'
 /usr/lib/gcc/x86_64-linux-gnu/9/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/9/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/9/lto-wrapper -plugin-opt=-fresolution=/tmp/ccabXmJb.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/9/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/9/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/9/../../.. /tmp/cc0bluCe.o -lgcc --push-state --as-needed -lgcc_s --pop-state -lc -lgcc --push-state --as-needed -lgcc_s --pop-state /usr/lib/gcc/x86_64-linux-gnu/9/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crtn.o
COLLECT_GCC_OPTIONS='-v' '-mtune=generic' '-march=x86-64'

Once compiled and successfully generate a.out file, I use llvm-mctoll to disassemble it. I used following commands, but failed

llvm-mctoll -I /usr/include/stdlib.h a.out

Error log

Unknown prototype for function : __stack_chk_fail
Use -I </full/path/to/file>, where /full/path/to/file declares its prototype

I checked elf file using llvm-objdump, but cannot find function __stack_chk_fail.

Am I missing anything? Or maybe mctoll does not support pointer parameters yet? I read x86FuncPrototypeDiscovery.cpp, and found ArgTyVec is treating 64-bit physical register as int64 (instead of checking it may be a pointer).

tianboh commented 4 months ago

Well, I guess maybe the default gcc compiler is too old. I use clang 14.0 to generate the .out file and then run mctoll, it can successfully generate LLVM IR file.

However, I am still curious how mctoll handles pointer type.

tianboh commented 4 months ago

I figured it out. tried following toy example

#include <stdlib.h>

double* dptradd(double* a, double* b){
    double* c = malloc(sizeof(double));
    *c = *a + *b;
    return c;
}

int main(){
    double a = 1.0;
    double b = 2.0;
    double* c = dptradd(&a, &b);
    return 0;
}

And generated ll file as below

; ModuleID = 'a.out'
source_filename = "a.out"

@rodata_15 = private unnamed_addr constant [24 x i8] c"\01\00\02\00\00\00\00\00\00\00\00\00\00\00\00@\00\00\00\00\00\00\F0?", align 8, !ROData_SecInfo !0

declare dso_local ptr @malloc(i64)

define dso_local i64 @dptradd(i64 %arg1, i64 %arg2) {
entry:
  %stktop_8 = alloca i8, i32 40, align 1
  %tos = ptrtoint ptr %stktop_8 to i64
  %0 = add i64 %tos, 16
  %RBP_N.24 = inttoptr i64 %0 to ptr
  %1 = add i64 %tos, 24
  %RBP_N.16 = inttoptr i64 %1 to ptr
  %2 = add i64 %tos, 32
  %RBP_N.8 = inttoptr i64 %2 to ptr
  %3 = add i64 %tos, 0
  %RSP_P.0 = inttoptr i64 %3 to ptr
  store i64 3735928559, ptr %RSP_P.0, align 8
  %RBP = ptrtoint ptr %RSP_P.0 to i64
  store i64 %arg1, ptr %RBP_N.8, align 1
  store i64 %arg2, ptr %RBP_N.16, align 1
  %4 = zext i32 8 to i64
  %5 = call ptr @malloc(i64 %4)
  %RAX = ptrtoint ptr %5 to i64
  store i64 %RAX, ptr %RBP_N.24, align 1
  %memload = load i64, ptr %RBP_N.8, align 1
  %6 = inttoptr i64 %memload to ptr
  %memload1 = load double, ptr %6, align 1
  %memload2 = load i64, ptr %RBP_N.16, align 1
  %7 = inttoptr i64 %memload2 to ptr
  %memload3 = load double, ptr %7, align 1
  %XMM0 = fadd double %memload1, %memload3
  %memload4 = load i64, ptr %RBP_N.24, align 1
  %8 = inttoptr i64 %memload4 to ptr
  store double %XMM0, ptr %8, align 1
  %memload5 = load i64, ptr %RBP_N.24, align 1
  ret i64 %memload5
}

define dso_local i32 @main() {
entry:
  %stktop_8 = alloca i8, i32 40, align 1
  %tos = ptrtoint ptr %stktop_8 to i64
  %0 = add i64 %tos, 16
  %RBP_N.24 = inttoptr i64 %0 to ptr
  %1 = add i64 %tos, 24
  %RBP_N.16 = inttoptr i64 %1 to ptr
  %2 = add i64 %tos, 36
  %RBP_N.4 = inttoptr i64 %2 to ptr
  %3 = add i64 %tos, 0
  %RSP_P.0 = inttoptr i64 %3 to ptr
  store i64 3735928559, ptr %RSP_P.0, align 8
  %RBP = ptrtoint ptr %RSP_P.0 to i64
  %memload = load double, ptr getelementptr inbounds ([24 x i8], ptr @rodata_15, i32 0, i32 8), align 1, !ROData_Content !1
  %memload1 = load double, ptr getelementptr inbounds ([24 x i8], ptr @rodata_15, i32 0, i32 16), align 1, !ROData_Content !2
  store i32 0, ptr %RBP_N.4, align 1
  store double %memload1, ptr %RBP_N.16, align 1
  store double %memload, ptr %RBP_N.24, align 1
  %RDI = ptrtoint ptr %RBP_N.16 to i64
  %RSI = ptrtoint ptr %RBP_N.24 to i64
  %RAX = call i64 @dptradd(i64 %RDI, i64 %RSI)
  store i64 %RAX, ptr %stktop_8, align 1
  ret i32 0
}

!0 = !{i64 4202496}
!1 = !{ptr getelementptr inbounds ([24 x i8], ptr @rodata_15, i32 0, i32 8)}
!2 = !{ptr getelementptr inbounds ([24 x i8], ptr @rodata_15, i32 0, i32 16)}

The pointer is just an address, it does not matter whether use i64 or other 64 bit type. The real operation that matters is fadd. This is properly handled in the following instruction raise part.