achanda / ipnetwork

A library to work with CIDRs in rust
Apache License 2.0
121 stars 38 forks source link

ipv6: rewrite core ipv6 methods to operate on u128s #187

Closed Alextopher closed 5 months ago

Alextopher commented 5 months ago

I was doing some assembly analysis and saw that the methods on Ipv6Network were not optimizing nearly as well as the equivalent Ipv4Network methods. This PR rewrites the core methods on Ipv6Network to use u128 similarly to how Ipv4Network uses u32.

While preforming this change I addressed some clippy lints and made some more common-sense additions.

Results on .contains()

Before:

.section .text.ipnetwork::ipv6::Ipv6Network::contains,"ax",@progbits
    .globl  ipnetwork::ipv6::Ipv6Network::contains
    .p2align    4, 0x90
    .type   ipnetwork::ipv6::Ipv6Network::contains,@function
ipnetwork::ipv6::Ipv6Network::contains:
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 226
        pub fn contains(&self, ip: Ipv6Addr) -> bool {
    .cfi_startproc
    push rbp
    .cfi_def_cfa_offset 16
    push r15
    .cfi_def_cfa_offset 24
    push r14
    .cfi_def_cfa_offset 32
    push rbx
    .cfi_def_cfa_offset 40
    .cfi_offset rbx, -40
    .cfi_offset r14, -32
    .cfi_offset r15, -24
    .cfi_offset rbp, -16
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/net/ip_addr.rs : 1373
        let [a, b, c, d, e, f, g, h] = unsafe { transmute::<_, [u16; 8]>(self.octets) };
    movzx edx, word ptr [rsi]
    movzx r11d, byte ptr [rdi + 16]
    xor eax, eax
    mov r8d, r11d
    sub r8b, 16
    mov r9d, 16
    mov r10d, r11d
    cmovae r10d, r9d
    mov ecx, -65536
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 206
        let mask = !(0xffff >> set_bits) as u16;
    sarx ebx, ecx, r10d
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 208
        chunk[1] = mask as u8;
    movzx ebp, bh
    vmovd xmm0, ebp
    vpinsrb xmm0, xmm0, ebx, 1
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/num/mod.rs : 493
        uint_impl! {
    movzx r8d, r8b
    cmovb r8d, eax
    cmp r8b, 16
    cmovae r8d, r9d
    mov r10d, r11d
    sub r10b, 32
    movzx r10d, r10b
    cmovb r10d, eax
    cmp r10b, 16
    cmovae r10d, r9d
    mov ebx, r11d
    sub bl, 48
    movzx ebx, bl
    cmovb ebx, eax
    cmp bl, 16
    cmovae ebx, r9d
    mov ebp, r11d
    sub bpl, 64
    movzx ebp, bpl
    cmovb ebp, eax
    cmp bpl, 16
    cmovae ebp, r9d
    mov r14d, r11d
    sub r14b, 80
    movzx r14d, r14b
    cmovb r14d, eax
    cmp r14b, 16
    cmovae r14d, r9d
    mov r15d, r11d
    sub r15b, 96
    movzx r15d, r15b
    cmovb r15d, eax
    cmp r15b, 16
    cmovae r15d, r9d
    sub r11b, 112
    movzx r11d, r11b
    cmovb r11d, eax
    cmp r11b, 16
    cmovae r11d, r9d
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/net/ip_addr.rs : 1373
        let [a, b, c, d, e, f, g, h] = unsafe { transmute::<_, [u16; 8]>(self.octets) };
    vmovd r9d, xmm0
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 234
        .all(|(mask, (a, b))| a & mask == b & mask)
    xor dx, word ptr [rdi]
    test dx, r9w
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/iter/traits/iterator.rs : 2462
        accum = f(accum, x)?;
    jne .LBB39_14
    sarx eax, ecx, r8d
    mov edx, eax
    shr edx, 8
    vpinsrb xmm0, xmm0, edx, 2
    vpinsrb xmm0, xmm0, eax, 3
    sarx eax, ecx, r10d
    mov edx, eax
    shr edx, 8
    vpinsrb xmm0, xmm0, edx, 4
    vpinsrb xmm5, xmm0, eax, 5
    sarx eax, ecx, ebx
    mov edx, eax
    shr edx, 8
    vpinsrb xmm0, xmm5, edx, 6
    vpinsrb xmm4, xmm0, eax, 7
    sarx eax, ecx, ebp
    mov edx, eax
    shr edx, 8
    vpinsrb xmm0, xmm4, edx, 8
    vpinsrb xmm3, xmm0, eax, 9
    sarx eax, ecx, r14d
    mov edx, eax
    shr edx, 8
    vpinsrb xmm0, xmm3, edx, 10
    vpinsrb xmm2, xmm0, eax, 11
    sarx eax, ecx, r15d
    mov edx, eax
    shr edx, 8
    vpinsrb xmm0, xmm2, edx, 12
    vpinsrb xmm1, xmm0, eax, 13
    sarx eax, ecx, r11d
    mov ecx, eax
    shr ecx, 8
    vpinsrb xmm0, xmm1, ecx, 14
    vpinsrb xmm0, xmm0, eax, 15
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/net/ip_addr.rs : 1373
        let [a, b, c, d, e, f, g, h] = unsafe { transmute::<_, [u16; 8]>(self.octets) };
    vpextrw eax, xmm0, 1
    movzx ecx, word ptr [rsi + 2]
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 234
        .all(|(mask, (a, b))| a & mask == b & mask)
    xor cx, word ptr [rdi + 2]
    test ecx, eax
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/iter/traits/iterator.rs : 2462
        accum = f(accum, x)?;
    jne .LBB39_2
    movzx eax, word ptr [rdi + 4]
    movzx ecx, word ptr [rsi + 4]
    vpextrw edx, xmm5, 2
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 234
        .all(|(mask, (a, b))| a & mask == b & mask)
    xor ecx, eax
    test cx, dx
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/iter/traits/iterator.rs : 2462
        accum = f(accum, x)?;
    jne .LBB39_2
    movzx eax, word ptr [rdi + 6]
    movzx ecx, word ptr [rsi + 6]
    vpextrw edx, xmm4, 3
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 234
        .all(|(mask, (a, b))| a & mask == b & mask)
    xor ecx, eax
    test cx, dx
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/iter/traits/iterator.rs : 2462
        accum = f(accum, x)?;
    jne .LBB39_2
    movzx eax, word ptr [rdi + 8]
    movzx ecx, word ptr [rsi + 8]
    vpextrw edx, xmm3, 4
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 234
        .all(|(mask, (a, b))| a & mask == b & mask)
    xor ecx, eax
    test cx, dx
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/iter/traits/iterator.rs : 2462
        accum = f(accum, x)?;
    jne .LBB39_2
    movzx eax, word ptr [rdi + 10]
    movzx ecx, word ptr [rsi + 10]
    vpextrw edx, xmm2, 5
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 234
        .all(|(mask, (a, b))| a & mask == b & mask)
    xor ecx, eax
    test cx, dx
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/iter/traits/iterator.rs : 2462
        accum = f(accum, x)?;
    jne .LBB39_2
    movzx eax, word ptr [rdi + 12]
    movzx ecx, word ptr [rsi + 12]
    vpextrw edx, xmm1, 6
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 234
        .all(|(mask, (a, b))| a & mask == b & mask)
    xor ecx, eax
    test cx, dx
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/iter/traits/iterator.rs : 2462
        accum = f(accum, x)?;
    je .LBB39_13
.LBB39_2:
    xor eax, eax
.LBB39_14:
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 235
        }
    pop rbx
    .cfi_def_cfa_offset 32
    pop r14
    .cfi_def_cfa_offset 24
    pop r15
    .cfi_def_cfa_offset 16
    pop rbp
    .cfi_def_cfa_offset 8
    ret
.LBB39_13:
    .cfi_def_cfa_offset 40
    movzx eax, word ptr [rdi + 14]
    movzx ecx, word ptr [rsi + 14]
    vpextrw edx, xmm0, 7
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 234
        .all(|(mask, (a, b))| a & mask == b & mask)
    xor ecx, eax
    test cx, dx
    sete al
    jmp .LBB39_14

After:

.section .text.ipnetwork::ipv6::Ipv6Network::contains,"ax",@progbits
    .globl  ipnetwork::ipv6::Ipv6Network::contains
    .p2align    4, 0x90
    .type   ipnetwork::ipv6::Ipv6Network::contains,@function
ipnetwork::ipv6::Ipv6Network::contains:
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 242
        pub fn contains(&self, ip: Ipv6Addr) -> bool {
    .cfi_startproc
    movzx eax, byte ptr [rdi + 16]
    xor ecx, ecx
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 190
        let mask = u128::MAX << (IPV6_BITS - self.prefix);
    xor edx, edx
    sub dl, al
    mov rax, -1
    shlx r8, rax, rdx
    test dl, 64
    cmove rcx, r8
    cmovne rax, r8
        // /rustc/25ef9e3d85d934b27d9dada2f9dd52b1dc63bb04/library/core/src/num/mod.rs : 1205
        uint_impl! {
    bswap rax
    bswap rcx
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 208
        let network = u128::from(self.addr) & mask;
    mov rdx, qword ptr [rdi]
    mov rdi, qword ptr [rdi + 8]
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 248
        (ip & mask) == net
    xor rdx, qword ptr [rsi]
    and rdx, rax
    xor rdi, qword ptr [rsi + 8]
    and rdi, rcx
    or rdi, rdx
    sete al
        // /home/mahonec/p/github.com/Alextopher/ipnetwork/src/ipv6.rs : 249
        }
    ret
achanda commented 5 months ago

Thanks! This is great.