minio / blake2b-simd

Fast hashing using pure Go implementation of BLAKE2b with SIMD instructions
Apache License 2.0
253 stars 31 forks source link

calls make instead of an array #16

Closed ericlagergren closed 8 years ago

ericlagergren commented 8 years ago

Incredibly trivial, but there's some calls to make that should/could just be arrays.

From d0b23577dcb640eda10ac6cc4124cf7e4435700e Mon Sep 17 00:00:00 2001
From: Eric Lagergren <ericscottlagergren@gmail.com>
Date: Thu, 7 Jul 2016 15:47:52 -0700
Subject: [PATCH] use arrays instead of make for simple values.

---
 benchmarks_test.go    |  6 +++---
 compressAvx2_amd64.go | 11 ++++++-----
 compressAvx_amd64.go  | 11 ++++++-----
 compressSse_amd64.go  | 11 ++++++-----
 compress_generic.go   |  9 +++++----
 5 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/benchmarks_test.go b/benchmarks_test.go
index bb1f7e4..f8d4c15 100644
--- a/benchmarks_test.go
+++ b/benchmarks_test.go
@@ -27,11 +27,11 @@ import (

 func benchmarkHash(b *testing.B, hash func() hash.Hash) {
    b.SetBytes(1024 * 1024)
-   data := make([]byte, 1024)
+   var data [1024]byte
    for i := 0; i < b.N; i++ {
        h := hash()
        for j := 0; j < 1024; j++ {
-           h.Write(data)
+           h.Write(data[:])
        }
        h.Sum(nil)
    }
@@ -59,7 +59,7 @@ func BenchmarkComparisonBlake2B(b *testing.B) {

 // Benchmark blake2b implementation.
 var bench = New512()
-var buf = make([]byte, 128*1024)
+var buf [128 * 1024]byte

 func benchmarkSize(b *testing.B, size int) {
    b.SetBytes(int64(size))
diff --git a/compressAvx2_amd64.go b/compressAvx2_amd64.go
index 1b3ebae..ec53599 100644
--- a/compressAvx2_amd64.go
+++ b/compressAvx2_amd64.go
@@ -23,11 +23,12 @@ package blake2b
 func compressAVX2Loop(p []uint8, in, iv, t, f, shffle, out []uint64)

 func compressAVX2(d *digest, p []uint8) {
+   var (
+       in     [8]uint64
+       out    [8]uint64
+       shffle [8]uint64
+   )

-   in := make([]uint64, 8, 8)
-   out := make([]uint64, 8, 8)
-
-   shffle := make([]uint64, 8, 8)
    // vector for PSHUFB instruction
    shffle[0] = 0x0201000706050403
    shffle[1] = 0x0a09080f0e0d0c0b
@@ -40,7 +41,7 @@ func compressAVX2(d *digest, p []uint8) {

    in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]

-   compressAVX2Loop(p, in, iv[:], d.t[:], d.f[:], shffle, out)
+   compressAVX2Loop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:])

    d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
 }
diff --git a/compressAvx_amd64.go b/compressAvx_amd64.go
index 7bed76c..cfa12c0 100644
--- a/compressAvx_amd64.go
+++ b/compressAvx_amd64.go
@@ -23,18 +23,19 @@ package blake2b
 func blockAVXLoop(p []uint8, in, iv, t, f, shffle, out []uint64)

 func compressAVX(d *digest, p []uint8) {
+   var (
+       in     [8]uint64
+       out    [8]uint64
+       shffle [2]uint64
+   )

-   in := make([]uint64, 8, 8)
-   out := make([]uint64, 8, 8)
-
-   shffle := make([]uint64, 2, 2)
    // vector for PSHUFB instruction
    shffle[0] = 0x0201000706050403
    shffle[1] = 0x0a09080f0e0d0c0b

    in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]

-   blockAVXLoop(p, in, iv[:], d.t[:], d.f[:], shffle, out)
+   blockAVXLoop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:])

    d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
 }
diff --git a/compressSse_amd64.go b/compressSse_amd64.go
index 7032f46..d539a7a 100644
--- a/compressSse_amd64.go
+++ b/compressSse_amd64.go
@@ -23,18 +23,19 @@ package blake2b
 func blockSSELoop(p []uint8, in, iv, t, f, shffle, out []uint64)

 func compressSSE(d *digest, p []uint8) {
+   var (
+       in     [8]uint64
+       out    [8]uint64
+       shffle [2]uint64
+   )

-   in := make([]uint64, 8, 8)
-   out := make([]uint64, 8, 8)
-
-   shffle := make([]uint64, 2, 2)
    // vector for PSHUFB instruction
    shffle[0] = 0x0201000706050403
    shffle[1] = 0x0a09080f0e0d0c0b

    in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]

-   blockSSELoop(p, in, iv[:], d.t[:], d.f[:], shffle, out)
+   blockSSELoop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:])

    d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
 }
diff --git a/compress_generic.go b/compress_generic.go
index 62d81aa..e9e16e8 100644
--- a/compress_generic.go
+++ b/compress_generic.go
@@ -26,12 +26,13 @@ func compressGeneric(d *digest, p []uint8) {
        v13 := iv[5] ^ d.t[1]
        v14 := iv[6] ^ d.f[0]
        v15 := iv[7] ^ d.f[1]
-       var m [16]uint64

        j := 0
-       for i := 0; i < 16; i++ {
-           m[i] = uint64(p[j]) | uint64(p[j+1])<<8 | uint64(p[j+2])<<16 | uint64(p[j+3])<<24 |
-               uint64(p[j+4])<<32 | uint64(p[j+5])<<40 | uint64(p[j+6])<<48 | uint64(p[j+7])<<56
+       var m [16]uint64
+       for i := range m {
+           m[i] = uint64(p[j]) | uint64(p[j+1])<<8 | uint64(p[j+2])<<16 |
+               uint64(p[j+3])<<24 | uint64(p[j+4])<<32 | uint64(p[j+5])<<40 |
+               uint64(p[j+6])<<48 | uint64(p[j+7])<<56
            j += 8
        }

-- 
2.9.0
harshavardhana commented 8 years ago

Thanks for letting us know. Please send PR that can be merged and also can you post go test -run=NONE -bench .

ericlagergren commented 8 years ago

I did, but my laptop can only run the ssse branch, not either of the avx. The differences weren't huge. Still, no reason to not statically allocate them.

harshavardhana commented 8 years ago

I did, but my laptop can only run the ssse branch, not either of the avx. The differences weren't huge. Still, no reason to not statically allocate them.

Thanks can you make a proper PR?

ericlagergren commented 8 years ago

Sure, will do later tonight when I get home.