plum-umd / the-838e-compiler

Compiler for CMSC 838E
2 stars 0 forks source link

Libraries #4

Closed dvanhorn closed 3 years ago

dvanhorn commented 3 years ago

Add a compile-library construct that consumes modules that consist only of function definitions and a simple provide clause, e.g.:

#lang racket
(provide length reverse)
(define (length xs)
  (if (empty? xs)
      0
      (add1 (length (cdr xs)))))
(define (reverse xs)
  (reverse/acc xs '()))
(define (reverse/acc xs a)
  (if (empty? xs)
      a
      (revserse/acc (cdr xs) (cons (car xs) a))))

The compiler should emit an object file that has global labels corresponding the module provisions.

Use this to construct a "standard library" that is linked into all programs.

rybla commented 3 years ago

What do you mean by "object file"? And if this object file only has labels, does that mean that libraries need to be recompiled every time they are linked?

dvanhorn commented 3 years ago

I mean a .o file. It shouldn't need to be recompiled, just linked with code generated by the compiler.

dvanhorn commented 3 years ago

Here's what I mean in more detail. Consider this program:

#lang racket
(begin
  (define (length xs)
    (if (empty? xs)
        0
        (add1 (length (cdr xs)))))

  (length (cons 1 (cons 2 (cons 3 '())))))

This currently compiles to something like this:

        global _entry
        default rel
        section .text
        extern _peek_byte
        extern _read_byte
        extern _write_byte
        extern _raise_error
_entry:
        mov rbx, rdi
        mov rax, 16
        push rax
        mov rax, 32
        push rax
        mov rax, 48
        push rax
        mov rax, 152
        mov [rbx + 0], rax
        pop rax
        mov [rbx + 8], rax
        mov rax, rbx
        or rax, 2
        add rbx, 16
        mov [rbx + 0], rax
        pop rax
        mov [rbx + 8], rax
        mov rax, rbx
        or rax, 2
        add rbx, 16
        mov [rbx + 0], rax
        pop rax
        mov [rbx + 8], rax
        mov rax, rbx
        or rax, 2
        add rbx, 16
        push rax
        call _length
        add rsp, 8
        mov rdx, rbx
        ret
_length:
        mov rax, [rsp + 8]
        cmp rax, 152
        mov rax, 24
        je _g2400
        mov rax, 56
_g2400:
        cmp rax, 56
        je _if2398
        mov rax, 0
        jmp _if2399
_if2398:
        mov rax, [rsp + 8]
        mov r9, rax
        and r9, 7
        cmp r9, 2
        jne _raise_error
        xor rax, 2
        mov rax, [rax + 0]
        push rax
        call _length
        add rsp, 8
        mov r9, rax
        and r9, 15
        cmp r9, 0
        jne _raise_error
        add rax, 16
_if2399:
        ret

This library feature should make it possible to compile this "library":

#lang racket
(provide length)
(define (length xs)
  (if (empty? xs)
      0
      (add1 (length (cdr xs)))))

which should generate the following code:

        global _length
        default rel
        section .text
        extern _raise_error
_length:
        mov rax, [rsp + 8]
        cmp rax, 152
        mov rax, 24
        je _g2400
        mov rax, 56
_g2400:
        cmp rax, 56
        je _if2398
        mov rax, 0
        jmp _if2399
_if2398:
        mov rax, [rsp + 8]
        mov r9, rax
        and r9, 7
        cmp r9, 2
        jne _raise_error
        xor rax, 2
        mov rax, [rax + 0]
        push rax
        call _length
        add rsp, 8
        mov r9, rax
        and r9, 15
        cmp r9, 0
        jne _raise_error
        add rax, 16
_if2399:
        ret

Now this can be assembled independently and linked in to the runtime:

> nasm -f macho64 length.s
> ld -r main.o char.o io.o length.o -o runtime.o

And programs out to be able to call length as though it were defined in the program:

#lang racket
(length (cons 1 (cons 2 (cons 3 '()))))

which should generate something like:

        global _entry
        default rel
        section .text
        extern _peek_byte
        extern _read_byte
        extern _write_byte
        extern _raise_error
        extern _length
_entry:
        mov rbx, rdi
        mov rax, 16
        push rax
        mov rax, 32
        push rax
        mov rax, 48
        push rax
        mov rax, 152
        mov [rbx + 0], rax
        pop rax
        mov [rbx + 8], rax
        mov rax, rbx
        or rax, 2
        add rbx, 16
        mov [rbx + 0], rax
        pop rax
        mov [rbx + 8], rax
        mov rax, rbx
        or rax, 2
        add rbx, 16
        mov [rbx + 0], rax
        pop rax
        mov [rbx + 8], rax
        mov rax, rbx
        or rax, 2
        add rbx, 16
        push rax
        call _length
        add rsp, 8
        mov rdx, rbx
        ret

And link this with the runtime to make an executable should do what you expect:

>  nasm -f macho64 use-len.s
>  gcc runtime.o use-len.o -o use-len
>  ./use-len
3
dvanhorn commented 3 years ago

Closed by #35