larryxiao / libflatarray

Boost Software License 1.0
1 stars 0 forks source link

Study: related project, and proposals #5

Open larryxiao opened 9 years ago

larryxiao commented 9 years ago
larryxiao commented 9 years ago

Vc - Overview - Code@Compeng

larryxiao commented 9 years ago

SIMD Types: The Vector Type & Operations

// from the proposal
unsigned short a = 40000;
auto b = -a; // decltype(b) == int

ushort_v v = 40000;
auto w = -v; // decltype(w) == ushort_v

assert(b == w[0]); // this fails
#include <stdio.h>

int main(int argc, const char *argv[])
{
    unsigned short a = 40000;
    unsigned short b = -a;
    printf("a 0x%08x %d, b 0x%08x %d\n", a, a, b, b);  
    // a 0x00009c40 40000, b 0x000063c0 25536
    return 0;
}
larryxiao commented 9 years ago

SIMD Types: The Mask Type & Write-Masking

typedef SimdArray<int, float_v::Size> IV;
for (int y = 0; y < imageHeight; ++y) {
    const float_v c_imag = y0 + y * scale;
    for (IV x = IV::IndexesFromZero(); any_of(x < imageWidth);
        x += float_v::Size) {
        const std::complex<float_v> c(x0 + x * scale, c_imag);
    std::complex<float_v> z = c;
    IV n = 0;
    auto inside = norm(z) < 4.f;
    while (any_of(inside && n < 255)) {
        z = z * z + c;
        where(inside) | n += 1;
        inside = norm(z) < 4.f;
    }
    IV colorValue = 255 - n;
    colorizeNextPixels(colorValue);
}
template<> void Mandel::mandelMe(QImage &image, float x0,
        float y0, float scale, int maxIt)
{
    typedef MyComplex Z;
    const unsigned int height = image.height();
    const unsigned int width = image.width();
    const float_v colorScale = 0xff / static_cast(maxIt);
    for (unsigned int y = 0; y < height; ++y) {
        unsigned int *VC_RESTRICT line = reinterpret_cast(image.scanLine(y));
        const float_v c_imag = y0 + y * scale;
        uint_m toStore;
        for (uint_v x = uint_v::IndexesFromZero(); !(toStore = x < width).isEmpty();
                x += float_v::Size) {
            const float_v c_real = x0 + static_cast(x) * scale;
            Z z(c_real, c_imag);
            float_v n = float_v::Zero();
            float_m inside = z.norm() < S;
            while (!(inside && n < maxIt).isEmpty()) {
                z = P(z, c_real, c_imag);
                ++n(inside);
                inside = z.norm() < S;
            }
            uint_v colorValue = static_cast((maxIt - n) * colorScale) * 0x10101;
            if (toStore.isFull()) {
                colorValue.store(line, Vc::Unaligned);
                line += uint_v::Size;
            } else {
                colorValue.store(line, toStore, Vc::Unaligned);
                break; // we don't need to check again wether x[0] + float_v::Size < width to break out of the loop
            }
        }
        if (restart()) {
            break;
        }
    }
}