rust-osdev / x86_64

Library to program x86_64 hardware.
Apache License 2.0
786 stars 130 forks source link

optimize from_page_table_indices #456

Closed Freax13 closed 7 months ago

Freax13 commented 7 months ago

The new code behaves exactly the same as the old code, but can be much better optimized by LLVM. In particular the bounds checks previously implied through set_bits and VirtAddr::new can now be elided. This also increases the chances of these functions being inlined.

The generated assembly for the old implementation looks like this:

 push    rax
 movzx   edi, di
 cmp     edi, 512
 jae     .LBB48_8
 movzx   esi, si
 cmp     esi, 512
 jae     .LBB48_8
 movzx   edx, dx
 cmp     edx, 512
 jae     .LBB48_8
 movzx   eax, cx
 cmp     eax, 512
 jae     .LBB48_8
 mov     rcx, rdi
 shl     rcx, 39
 shl     rsi, 30
 or      rsi, rcx
 shl     rdx, 21
 shl     rax, 12
 or      rax, rdx
 or      rax, rsi
 shr     edi, 8
 je      .LBB48_7
 cmp     edi, 1
 jne     .LBB48_9
 shl     rax, 16
 sar     rax, 16
 shl     rax, 16
 sar     rax, 16
 and     rax, -4096
 pop     rcx
 lea     rdi, [rip, +, .L__unnamed_46]
 lea     rdx, [rip, +, .L__unnamed_15]
 mov     esi, 33
 call    qword, ptr, [rip, +, _ZN4core9panicking5panic17h87fd92496103e3b8E@GOTPCREL]
 mov     qword, ptr, [rsp], rax
 lea     rdi, [rip, +, .L__unnamed_9]
 lea     rcx, [rip, +, .L__unnamed_10]
 lea     r8, [rip, +, .L__unnamed_11]
 mov     rdx, rsp
 mov     esi, 74
 call    qword, ptr, [rip, +, _ZN4core6result13unwrap_failed17hc28f4ee1a6255957E@GOTPCREL]

The generated assembly for the new implementation looks like this:

 shl     rdi, 55
 movzx   esi, si
 shl     rsi, 46
 or      rsi, rdi
 movzx   edx, dx
 shl     rdx, 37
 movzx   eax, cx
 shl     rax, 28
 or      rax, rdx
 or      rax, rsi
 sar     rax, 16