fosslinux / live-bootstrap

Use of a Linux initramfs to fully automate the bootstrapping process
282 stars 26 forks source link

tcc 0.9.27 crashes building lwext4 in chroot/bwrap #281

Closed eduardosm closed 1 year ago

eduardosm commented 1 year ago

I was testing some changes that would optionally enable building kernels (fiwix, linux) and related tools (lwext4, kexec) in chroot/bwrap builds, when tcc crashed while building lwext4.

Output ``` +> cd build +> untar --file ../src/lwext4-1.0.0-lb1.tar The extraction of ../src/lwext4-1.0.0-lb1.tar was successful +> cd lwext4-1.0.0-lb1 +> mkdir build_generic +> mkdir build_generic/include +> mkdir build_generic/include/generated +> catm build_generic/include/generated/ext4_config.h ../../config/ext4_config.h +> cd blockdev/linux +> tcc -m32 -march=i386 -std=c89 -I../../include -I../../build_generic/include -I../../../tcc/tcc-0.9.27/include -DVERSION="1.0" -c file_dev.c file_dev.c:71: warning: implicit declaration of function 'setbuf' +> cd ../../src +> alias cc=tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_balloc.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_bcache.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_bitmap.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_block_group.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_blockdev.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_crc32.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_debug.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_dir.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_dir_idx.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_extent.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_fs.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_hash.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_ialloc.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_inode.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_journal.c ext4_journal.c:1845: warning: assignment makes integer from pointer without a cast ext4_journal.c:1879: warning: assignment makes integer from pointer without a cast +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_mbr.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_mkfs.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_super.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_trans.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -c ext4_xattr.c +> catm make_fiwix_initrd.c ../../../files/make_fiwix_initrd.c +> tcc -m32 -march=i386 -std=c89 -I../include -I../build_generic/include -I../../tcc/tcc-0.9.27/include -DVERSION="1.0" -c make_fiwix_initrd.c +> tcc -m32 -o /usr/bin/make_fiwix_initrd ext4.o ext4_balloc.o ext4_bcache.o ext4_bitmap.o ext4_block_group.o ext4_blockdev.o ext4_crc32.o ext4_debug.o ext4_dir.o ext4_dir_idx.o ext4_extent.o ext4_fs.o ext4_hash.o ext4_ialloc.o ext4_inode.o ext4_journal.o ext4_mbr.o ext4_mkfs.o ext4_super.o ext4_trans.o ext4_xattr.o ../blockdev/linux/file_dev.o make_fiwix_initrd.o /usr/bin/tcc abnormal termination, signal number = 11 Subprocess error 11 ABORTING HARD Subprocess error 1 ABORTING HARD Subprocess error 1 ABORTING HARD Subprocess error ABORTING HARD ```

The core dump as-is was not very useful, so I added the -g flag in sysa/tcc-0.9.27/tcc-0.9.27.kaem, so tcc 0.9.27 is built with debuginfo.

After that, I got the following:

Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x08064f8a in fill_local_got_entries () at tccelf.c:1362
1362        for_each_elem(s1->got->reloc, 0, rel, ElfW_Rel) {
(gdb) bt
#0  0x08064f8a in fill_local_got_entries () at tccelf.c:1362
#1  0x080672c4 in elf_output_file () at tccelf.c:2223
#2  0x0806732b in tcc_output_file () at tccelf.c:2243
#3  0x0807852f in main () at tcc.c:354

and

(gdb) disassemble 
Dump of assembler code for function fill_local_got_entries:
   0x08064f6e <+0>:     push   %ebp
   0x08064f6f <+1>:     mov    %esp,%ebp
   0x08064f71 <+3>:     sub    $0x14,%esp
   0x08064f77 <+9>:     mov    0x8(%ebp),%eax
   0x08064f7a <+12>:    add    $0x33c,%eax
   0x08064f80 <+18>:    mov    (%eax),%ecx
   0x08064f82 <+20>:    add    $0x3c,%ecx
   0x08064f85 <+23>:    mov    (%ecx),%eax
   0x08064f87 <+25>:    add    $0x4,%eax
=> 0x08064f8a <+28>:    mov    (%eax),%ecx
   0x08064f8c <+30>:    mov    %ecx,-0x4(%ebp)
   0x08064f8f <+33>:    mov    0x8(%ebp),%eax
   0x08064f92 <+36>:    add    $0x33c,%eax
   0x08064f98 <+42>:    mov    (%eax),%ecx
   0x08064f9a <+44>:    add    $0x3c,%ecx
   0x08064f9d <+47>:    mov    (%ecx),%eax
   0x08064f9f <+49>:    add    $0x4,%eax
   0x08064fa2 <+52>:    mov    0x8(%ebp),%ecx
   0x08064fa5 <+55>:    add    $0x33c,%ecx
   0x08064fab <+61>:    mov    (%ecx),%edx
   0x08064fad <+63>:    add    $0x3c,%edx
   0x08064fb0 <+66>:    mov    (%edx),%ecx
...

This is the function that crashes:

/* See put_got_entry for a description.  This is the second stage
   where GOT references to local defined symbols are rewritten.  */
static void fill_local_got_entries(TCCState *s1)
{
    ElfW_Rel *rel;
    for_each_elem(s1->got->reloc, 0, rel, ElfW_Rel) {
    if (ELFW(R_TYPE)(rel->r_info) == R_RELATIVE) {
        int sym_index = ELFW(R_SYM) (rel->r_info);
        ElfW(Sym) *sym = &((ElfW(Sym) *) symtab_section->data)[sym_index];
        struct sym_attr *attr = get_sym_attr(s1, sym_index, 0);
        unsigned offset = attr->got_offset;
        if (offset != rel->r_offset - s1->got->sh_addr)
          tcc_error_noabort("huh");
        rel->r_info = ELFW(R_INFO)(0, R_RELATIVE);
#if SHT_RELX == SHT_RELA
        rel->r_addend = sym->st_value;
#else
        /* All our REL architectures also happen to be 32bit LE.  */
        write32le(s1->got->data + offset, sym->st_value);
#endif
    }
    }
}

and the for_each_elem macro:

/* Browse each elem of type <type> in section <sec> starting at elem <startoff>
   using variable <elem> */
#define for_each_elem(sec, startoff, elem, type) \
    for (elem = (type *) sec->data + startoff; \
         elem < (type *) (sec->data + sec->data_offset); elem++)

It looks like the crash happens because s1->got->reloc is a NULL pointer.

s1->got and s1->got->reloc are both pointers to a Section

/* section definition */
typedef struct Section {
    unsigned long data_offset; /* current data offset */
    unsigned char *data;       /* section data */
    unsigned long data_allocated; /* used for realloc() handling */
    int sh_name;             /* elf section name (only used during output) */
    int sh_num;              /* elf section number */
    int sh_type;             /* elf section type */
    int sh_flags;            /* elf section flags */
    int sh_info;             /* elf section info */
    int sh_addralign;        /* elf section alignment */
    int sh_entsize;          /* elf entry size */
    unsigned long sh_size;   /* section size (only used during output) */
    addr_t sh_addr;          /* address at which the section is relocated */
    unsigned long sh_offset; /* file offset */
    int nb_hashed_syms;      /* used to resize the hash table */
    struct Section *link;    /* link to another section */
    struct Section *reloc;   /* corresponding section for relocation, if any */
    struct Section *hash;    /* hash table for symbols */
    struct Section *prev;    /* previous section on section stack */
    char name[1];           /* section name */
} Section;

That's all I figured out for now, I will look more into it later.

fosslinux commented 1 year ago

I can reproduce this.

This is very odd. I have a lot of trouble justifying why this should even happen in the first place - how does builder-hex0 even work?

eduardosm commented 1 year ago

My first guess is that builder-hex0 does not have virtual memory, so the segfault is not trapped and tcc simply carries on. Although at that point tcc has entered the realm of undefined behavior, it is apparently able to produce a working executable.

I included a fix/workaround for this as part of https://github.com/fosslinux/live-bootstrap/pull/282, which consist of checking for null pointer, although I am not sure if it is legit for that pointer to be null.