Terraspace / UASM

UASM - Macro Assembler
http://www.terraspace.co.uk/uasm.html
Other
222 stars 49 forks source link

Wrong macho64 output with struct names #184

Closed jdp1024 closed 3 months ago

jdp1024 commented 1 year ago

My environment:

Darwin localhost 21.6.0 Darwin Kernel Version 21.6.0: Mon Aug 22 20:17:10 PDT 2022; root:xnu-8020.140.49~2/RELEASE_X86_64 x86_64
@(#)PROGRAM:ld  PROJECT:ld64-820.1
BUILD 18:42:34 Sep 11 2022
configured to support archs: armv6 armv7 armv7s arm64 arm64e arm64_32 i386 x86_64 x86_64h armv6m armv7k armv7m armv7em
LTO support using: LLVM version 14.0.0, (clang-1400.0.29.202) (static support for 29, runtime is 29)
TAPI support using: Apple TAPI version 14.0.0 (tapi-1400.0.11)
Apple clang version 14.0.0 (clang-1400.0.29.202)
Target: x86_64-apple-darwin21.6.0
Thread model: posix
InstalledDir: /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin

I used uasm 2.56.2 release package.

The assembly file, t.asm.

.code

Foo struct
    f1  dd ?
    f2  dd ?
Foo ends

_r2 proc
    mov rax, 42
    ret
_r2 endp

_rx proc
    mov rax, (SIZEOF Foo)
    ret
_rx endp

end

The struct Foo is used in _rx.

The the C file, m.c.

#include <stdio.h>
#include <stdlib.h>

int rx();

int main() {
    printf("%d\n", rx());
}

Then compile them:

$ uasm -nologo -macho64 t.asm
t.asm: 19 lines, 2 passes, 1294 ms, 0 warnings, 0 errors

and

cc -o m m.c t.o
Undefined symbols for architecture x86_64:
  "_rx", referenced from:
      _main in m-d22313.o
ld: symbol(s) not found for architecture x86_64
clang: error: linker command failed with exit code 1 (use -v to see invocation)

ld complained that _rx was not found. I checked it with nm:

$ nm t.o
0000000000000000 s Foo
0000000000000000 T _r2
0000000000000008 T _rx

It is there. Now the fun part:

I removed _r2 function from t.asm. Now it looks like:

.code

Foo struct
    f1  dd ?
    f2  dd ?
Foo ends

_rx proc
    mov rax, (SIZEOF Foo)
    ret
_rx endp

end

then compiled both files:

$ uasm -nologo -macho64 t.asm
t.asm: 13 lines, 2 passes, 1402 ms, 0 warnings, 0 errors
$  cc -o m m.c t.o
$

The link was successful, but it crashed:

$ ./m
[1]    46331 segmentation fault  ./m

I disassembled the output m with objdump:

$ objdump --disassemble ./m

./m:    file format mach-o 64-bit x86-64

Disassembly of section __TEXT,__text:

0000000100003f40 <_main>:
100003f40: 55                           pushq   %rbp
100003f41: 48 89 e5                     movq    %rsp, %rbp
100003f44: b0 00                        movb    $0, %al
100003f46: e8 35 00 00 00               callq   0x100003f80 <dyld_stub_binder+0x100003f80>
100003f4b: 89 c6                        movl    %eax, %esi
100003f4d: 48 8d 3d 4e 00 00 00         leaq    78(%rip), %rdi          ## 0x100003fa2 <dyld_stub_binder+0x100003fa2>
100003f54: b0 00                        movb    $0, %al
100003f56: e8 25 00 00 00               callq   0x100003f80 <dyld_stub_binder+0x100003f80>
100003f5b: 31 c0                        xorl    %eax, %eax
100003f5d: 5d                           popq    %rbp
100003f5e: c3                           retq
100003f5f: 90                           nop
100003f60: 90                           nop
100003f61: 90                           nop
100003f62: 90                           nop
100003f63: 90                           nop
100003f64: 90                           nop
100003f65: 90                           nop
100003f66: 90                           nop
100003f67: 90                           nop
100003f68: 90                           nop
100003f69: 90                           nop
100003f6a: 90                           nop
100003f6b: 90                           nop
100003f6c: 90                           nop
100003f6d: 90                           nop
100003f6e: 90                           nop
100003f6f: 90                           nop
100003f70: 90                           nop
100003f71: 90                           nop
100003f72: 90                           nop
100003f73: 90                           nop
100003f74: 90                           nop
100003f75: 90                           nop
100003f76: 90                           nop
100003f77: 90                           nop
100003f78: 90                           nop
100003f79: 90                           nop
100003f7a: 90                           nop
100003f7b: 90                           nop
100003f7c: 90                           nop
100003f7d: 90                           nop
100003f7e: 90                           nop
100003f7f: 90                           nop

Disassembly of section __TEXT,__stubs:

0000000100003f80 <__stubs>:
100003f80: ff 25 7a 40 00 00            jmpq    *16506(%rip)            ## 0x100008000 <dyld_stub_binder+0x100008000>

Disassembly of section __TEXT,__stub_helper:

0000000100003f88 <__stub_helper>:
100003f88: 4c 8d 1d 79 40 00 00         leaq    16505(%rip), %r11       ## 0x100008008 <__dyld_private>
100003f8f: 41 53                        pushq   %r11
100003f91: ff 25 69 00 00 00            jmpq    *105(%rip)              ## 0x100004000 <dyld_stub_binder+0x100004000>
100003f97: 90                           nop
100003f98: 68 00 00 00 00               pushq   $0
100003f9d: e9 e6 ff ff ff               jmp 0x100003f88 <__stub_helper>

and as you can see, there are TWO callq 0x100003f80 calls.

nm solved the myst:

$ nm ./m
0000000100008008 d __dyld_private
0000000100000000 T __mh_execute_header
0000000100003f40 T _main
                 U _printf
0000000100003f80 T _rx
                 U dyld_stub_binder

_rx is there, but it is EMPTY. Now again the fun part number 2, I changed t.asm a little bit: remove the usage of struct Foo:

.code

Foo struct
    f1  dd ?
    f2  dd ?
Foo ends

_rx proc
    mov rax, 42; (SIZEOF Foo)
    ret
_rx endp

end

then compiled both files again and ran m:

$ ./m
42

I spent many hours on why this happened before I got this minimal test case. The reason, I guess, is ld uses the definition of struct Foo to link against _rx. I am no expert on the format of macho-64, the workaround I have found is not to output the type to the result .o file.

static int macho_add_string(struct strentry *pstr, struct macho_module *mm)
{
    /* Don't output static type */
    if ((pstr->sym->state == SYM_TYPE) && !pstr->sym->ispublic)
    {
        return 0;
    }

    struct strentry *pCurrStr = mm->strings;
    int lastIdx = 1;
    int ofs = 1;
    if (pCurrStr == NULL)
    {
        pstr->idx = 1;
        pstr->offset = 1;
        mm->strings = pstr;
    }
    else
    {
        ofs += strlen(pCurrStr->pstr) + 1;
        while (pCurrStr->next != NULL)
        {
            pCurrStr = pCurrStr->next;
            ofs += strlen(pCurrStr->pstr) + 1;
            lastIdx = pCurrStr->idx;
        }
        pstr->idx = lastIdx + 1;
        pstr->offset = ofs;
        pCurrStr->next = pstr;
    }
    return 1;
}

then in macho_build_string_tbl, don't touch the counters if macho_add_string returns 0 like this:

int macho_build_string_tbl(struct symtab_command *pSymCmd, struct macho_module *mm)
{
    int tblSize = 0;
    int i = 0;
    struct asym *sym = NULL;
    struct strentry *pstr = NULL;
    int totalSymCount = 0;

    /* Normal local symbols */
    while (sym = SymEnum(sym, &i))
    {
        if (strcmp(sym->name, "$xdatasym") == 0) continue;
        if (sym->state != SYM_MACRO && sym->state != SYM_SEG && sym->state != SYM_TMACRO && sym->predefined == 0 && sym->state != SYM_GRP && sym->isequate == 0)
        { 
            if (sym->state != SYM_EXTERNAL && !sym->ispublic && sym->used)
            {
                pstr = malloc(sizeof(struct strentry));
                memset(pstr, 0, sizeof(struct strentry));
                pstr->pstr = sym->name;
                pstr->sym = sym;
                if (macho_add_string(pstr, mm))
                {
                    mm->symCount++;
                    totalSymCount++;
                    tblSize += strlen(sym->name) + 1;
                }
            }
        }
    }
    mm->extSymIdx = totalSymCount;

    /* External public symbols */
    while (sym = SymEnum(sym, &i))
    {
        if (sym->state != SYM_MACRO && sym->state != SYM_SEG && sym->state != SYM_TMACRO && sym->predefined == 0 && sym->state != SYM_GRP && sym->isequate == 0)
        {
            if (sym->ispublic)
            {
                pstr = malloc(sizeof(struct strentry));
                memset(pstr, 0, sizeof(struct strentry));
                pstr->pstr = sym->name;
                pstr->sym = sym;
                if (macho_add_string(pstr, mm))
                {
                    mm->extSymCount++;
                    totalSymCount++;
                    tblSize += strlen(sym->name) + 1;
                }
            }
        }
    }
    mm->undefSymIdx = totalSymCount;

    /* Undefined symbols */
    while (sym = SymEnum(sym, &i))
    {
        if (sym->state != SYM_MACRO && sym->state != SYM_SEG && sym->state != SYM_TMACRO && sym->predefined == 0 && sym->state != SYM_GRP && sym->isequate == 0)
        {
            if (sym->state == SYM_EXTERNAL)
            {
                pstr = malloc(sizeof(struct strentry));
                memset(pstr, 0, sizeof(struct strentry));
                pstr->pstr = sym->name;
                pstr->sym = sym;
                if (macho_add_string(pstr, mm))
                {
                    mm->undefSymCount++;
                    totalSymCount++;
                    tblSize += strlen(sym->name) + 1;
                }
            }
        }
    }
    return(tblSize);
}

Now everything works with the original t.asm and m.c.

$ uasm -nologo -macho64 t.asm
$ cc -o m m.c t.o
$ ./m
8

nm's output looks fine:

$ nm m
0000000100008008 d __dyld_private
0000000100000000 T __mh_execute_header
0000000100003f00 T _main
                 U _printf
0000000100003f40 T _r2
0000000100003f48 T _rx
                 U dyld_stub_binder

The disassembled output is good:

objdump --disassemble ./m

./m:    file format mach-o 64-bit x86-64

Disassembly of section __TEXT,__text:

0000000100003f00 <_main>:
100003f00: 55                           pushq   %rbp
100003f01: 48 89 e5                     movq    %rsp, %rbp
100003f04: b0 00                        movb    $0, %al
100003f06: e8 3d 00 00 00               callq   0x100003f48 <_rx>
100003f0b: 89 c6                        movl    %eax, %esi
100003f0d: 48 8d 3d 8e 00 00 00         leaq    142(%rip), %rdi         ## 0x100003fa2 <dyld_stub_binder+0x100003fa2>
100003f14: b0 00                        movb    $0, %al
100003f16: e8 65 00 00 00               callq   0x100003f80 <dyld_stub_binder+0x100003f80>
100003f1b: 31 c0                        xorl    %eax, %eax
100003f1d: 5d                           popq    %rbp
100003f1e: c3                           retq

... a lot of nops omitted ...

0000000100003f40 <_r2>:
100003f40: 48 c7 c0 2a 00 00 00         movq    $42, %rax
100003f47: c3                           retq

0000000100003f48 <_rx>:
100003f48: 48 c7 c0 08 00 00 00         movq    $8, %rax
100003f4f: c3                           retq
john-terraspace commented 3 months ago

macho64 will no longer be supported or developed.