isa-l_crypto slower than openssl/sha for hashing sha256

tsv1991 commented 1 year ago

I created a file with 1GB size. fallocate -l 1G bench.dat

And made 3 test program. Python with hashlib:

time python3 ./bench.py
49bc20df15e412a64472421e13fe86ff1c5165e18b2afccf160d4dc19fe68a14

real    0m2.699s
user    0m2.313s
sys 0m0.379s

C with openssl/sha.h:

time ./sha256 ../bench.dat
File('../bench.dat'): 49bc20df15e412a64472421e13fe86ff1c5165e18b2afccf160d4dc19fe68a14

real    0m2.476s
user    0m2.310s
sys 0m0.153s

And C with sha256_mb.h (isa-l_crypto):

time ./a.out ../bench.dat
49bc20df15e412a64472421e13fe86ff1c5165e18b2afccf160d4dc19fe68a14

real    0m3.357s
user    0m3.171s
sys 0m0.175s

And I was very surprised when saw that time of hashing with isa-l_crypto was bigger. What could be the problem?

I was looking at this code https://github.com/01org/isa-l_crypto/blob/master/sha256_mb/sha256_mb_test.c and https://github.com/intel/isa-l_crypto/issues/3

#include "sha256_mb.h"
#include <stdlib.h>
#include <stdio.h>
#define BUFF_SIZE (1024*1024*64)

#define NUM_JOBS 1

int main() {
    SHA256_HASH_CTX_MGR *mgr = NULL;
    SHA256_HASH_CTX ctxpool[NUM_JOBS];

    posix_memalign((void *) &mgr, 16, sizeof(SHA256_HASH_CTX_MGR));
    sha256_ctx_mgr_init(mgr);
    hash_ctx_init(&ctxpool[0]);

    FILE *f = fopen("path_to_input_file", "r");
    if (f == NULL) {
        printf("can not open file");
        return 0;
    }
    void *buff = malloc(BUFF_SIZE);

    size_t count = 0;
    size_t read_in;

    sha256_ctx_mgr_submit(mgr, &ctxpool[0], buff, 0, HASH_FIRST);
    sha256_ctx_mgr_flush(mgr);
    do {
        read_in = fread(buff, 1, BUFF_SIZE, f);
        sha256_ctx_mgr_submit(mgr, &ctxpool[0], buff, (uint32_t) read_in, HASH_UPDATE);
        sha256_ctx_mgr_flush(mgr);
        count += read_in;
    } while (read_in > 0);

    sha256_ctx_mgr_submit(mgr, &ctxpool[0], buff, 0, HASH_LAST);
    sha256_ctx_mgr_flush(mgr);
    printf("file size:%zu\n", count);

    int j;
    for (j = 0; j < SHA256_DIGEST_NWORDS; j++) {
        printf("%08x ", ctxpool[0].job.result_digest[j]);
    }
    return 0;
}

Also I try use this example and got same time of hashing:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <isa-l_crypto.h>
#include <endian.h>
#include <unistd.h>
#include <sys/time.h>

#define HASH_CTX_MGR    SHA256_HASH_CTX_MGR
#define HASH_CTX        SHA256_HASH_CTX
#define BLOCK_SIZE      SHA256_BLOCK_SIZE
#define ctx_mgr_init    sha256_ctx_mgr_init
#define ctx_mgr_submit  sha256_ctx_mgr_submit
#define ctx_mgr_flush   sha256_ctx_mgr_flush

#define ARRAY_SIZE(a)   (sizeof(a) / sizeof(*(a)))

static void print_hash(HASH_CTX *job, bool htobe) {
    const int *p = job->job.result_digest;
    int bytes_count = sizeof job->job.result_digest[0];

    for (size_t i = 0; i < ARRAY_SIZE(job->job.result_digest); i++) {
        if (htobe) {
            if (bytes_count == 4) {
                printf("%08x", htobe32(p[i]));
            } else if (bytes_count == 8) {
                printf("%016lx", htobe64(p[i]));
            } else {
                assert(0);
            }
        } else {
            if (bytes_count == 4) {
                printf("%08x", p[i]);
            } else if (bytes_count == 8) {
                printf("%016lx", p[i]);
            } else {
                assert(0);
            }
        }
    }

    putchar('\n');
}

static HASH_CTX *
submit_flush(HASH_CTX_MGR *mgr, HASH_CTX *ctx, const void *buffer, uint32_t len, HASH_CTX_FLAG flags) {
    HASH_CTX *job = ctx_mgr_submit(mgr, ctx, buffer, len, flags);
    if (job == NULL && hash_ctx_processing(ctx)) {
        HASH_CTX *job2 = ctx_mgr_flush(mgr);
        if (job2 != NULL) {
            assert(job2 == ctx);
            assert(job2->error == HASH_CTX_ERROR_NONE);
            if (flags & HASH_LAST) {
                job = job2;
            }
        }
    }
    return job;
}

static void calc_hash(const char *filename) {
    HASH_CTX_MGR *mgr = NULL;

    int e = posix_memalign((void **) &mgr, 16, sizeof *mgr);
    assert(e == 0);
    assert(mgr != NULL);
    ctx_mgr_init(mgr);

    HASH_CTX ctx;
    hash_ctx_init(&ctx);

    FILE *fp = fopen(filename, "r");
    //char buffer[BLOCK_SIZE * 2];
    char buffer[4 * 1024 * 1024];
    int i = 0;
    size_t nRead;
    HASH_CTX_FLAG flag;
    while (!feof(fp)) {
        //size_t x = ((unsigned long) random()) % sizeof buffer;
        nRead = fread(&buffer, 1, sizeof buffer, fp);
        e = ferror(fp);
        assert(e == 0);
        flag = i != 0 ? HASH_UPDATE : HASH_FIRST;
        submit_flush(mgr, &ctx, buffer, nRead, flag);
        i = 1;
    }
    (void) fclose(fp);

    HASH_CTX *ctx2 = submit_flush(mgr, &ctx, NULL, 0, HASH_LAST);
    assert(ctx2 != NULL);
    assert(ctx2 == &ctx);
    assert(ctx2->error == HASH_CTX_ERROR_NONE);

    print_hash(&ctx, false);

    free(mgr);
}

static void rand_init() {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    srandom((getpid() << 16) ^ tv.tv_sec ^ tv.tv_usec);
}

int main(int argc, const char *argv[]) {
    if (argc <= 1) {
        printf("ISA-L crypto version: %u.%u.%u\n",
               ISAL_CRYPTO_MAJOR_VERSION, ISAL_CRYPTO_MINOR_VERSION, ISAL_CRYPTO_PATCH_VERSION);
        return 0;
    }

    rand_init();

    for (int i = 1; i < argc; i++) {
        calc_hash(argv[i]);
    }

    return 0;
}

gbtucker commented 1 year ago

@tsv1991, you are not using the best part of multi-buffer hashing; hashing multiple independent jobs at one time. Disregarding file I/O time, you might be able to do 10-12 hashes in the time a single buffer hashing can do one. By just doing summit() followed immediately by flush() you loose any advantage of doing more than one job at a time. You do have to have independent hash jobs around as there is no way to split up a single hash block into independently calculated chunks.

tsv1991 commented 1 year ago

@gbtucker should the hash calculated using multi-buffer hashing and the classic approach be the same? I couldn't get a hash to be the same.

intel / isa-l_crypto

isa-l_crypto slower than openssl/sha for hashing sha256 #111