zeam-vm / pelemay

Pelemay is a native compiler for Elixir, which generates SIMD instructions. It has a plan to generate for GPU code.
Apache License 2.0
186 stars 13 forks source link

Create String concatenation <> SIMD function #102

Open zacky1972 opened 4 years ago

zacky1972 commented 4 years ago

Create the following functions:

#include <erl_nif.h>

int string_concat_buffer(ErlNifBinary left, ErlNifBinary right, ErlNifBinary *object);
ERL_NIF_TERM string_concat(ErlNifEnv *env, ERL_NIF_TERM left, ERL_NIF_TERM right);

And also compare and evaluate execution time.

branch is string_concat

zacky1972 commented 4 years ago

I implemented it but it is much slower than Kernel.<>...

static
ERL_NIF_TERM concat_1(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
    if(__builtin_expect(argc != 2, false)) {
        return enif_make_badarg(env);
    }
    ERL_NIF_TERM left = argv[0];
    ErlNifBinary left_binary;
    if(__builtin_expect(!enif_inspect_binary(env, left, &left_binary), false)) {
        return enif_make_badarg(env);
    }
    ERL_NIF_TERM right = argv[1];
    ErlNifBinary right_binary;
    if(__builtin_expect(!enif_inspect_binary(env, right, &right_binary), false)) {
        return enif_make_badarg(env);
    }
    ErlNifBinary object_binary;
    if(__builtin_expect(!enif_alloc_binary(left_binary.size + right_binary.size, &object_binary), false)) {
        return enif_make_badarg(env);
    }
    memcpy(object_binary.data, left_binary.data, left_binary.size);
    memcpy(object_binary.data + left_binary.size, right_binary.data, right_binary.size);
    return enif_make_binary(env, &object_binary);
}

static
ERL_NIF_TERM concat_2(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
    if(__builtin_expect(argc != 2, false)) {
        return enif_make_badarg(env);
    }
    ERL_NIF_TERM left = argv[0];
    ErlNifBinary left_binary;
    if(__builtin_expect(!enif_inspect_binary(env, left, &left_binary), false)) {
        return enif_make_badarg(env);
    }
    ERL_NIF_TERM right = argv[1];
    ErlNifBinary right_binary;
    if(__builtin_expect(!enif_inspect_binary(env, right, &right_binary), false)) {
        return enif_make_badarg(env);
    }
    ErlNifBinary object_binary;
    if(__builtin_expect(!enif_alloc_binary(left_binary.size + right_binary.size, &object_binary), false)) {
        return enif_make_badarg(env);
    }
    unsigned char *ptr = object_binary.data;
#pragma clang loop vectorize_width(loop_vectorize_width)
    for(unsigned i = 0; i < left_binary.size; i++) {
        *ptr++ = left_binary.data[i];
    }
    for(unsigned i = 0; i < right_binary.size; i++) {
        *ptr++ = right_binary.data[i];
    }
    return enif_make_binary(env, &object_binary);
}
## StringConcatBench
benchmark  iterations   average time 
Kernel.<>  1000000000   0.01 µs/op
concat_1     10000000   0.24 µs/op
concat_2     10000000   0.31 µs/op