Open Panhaolin2001 opened 8 hours ago
#include "VecCore/VecCore" using namespace vecCore; template<typename T> void mandelbrot(T xmin, T xmax, size_t nx, T ymin, T ymax, size_t ny, size_t max_iter, unsigned char *image) { T dx = (xmax - xmin) / T(nx); T dy = (ymax - ymin) / T(ny); for (size_t i = 0; i < nx; ++i) { for (size_t j = 0; j < ny; ++j) { size_t k = 0; T x = xmin + T(i) * dx, cr = x, zr = x; T y = ymin + T(j) * dy, ci = y, zi = y; do { x = zr*zr - zi*zi + cr; y = T(2.0) * zr*zi + ci; zr = x; zi = y; } while (++k < max_iter && (zr*zr + zi*zi < T(4.0))); image[ny*i + j] = k; } } } template<typename T> void mandelbrot_v(Scalar<T> xmin, Scalar<T> xmax, size_t nx, Scalar<T> ymin, Scalar<T> ymax, size_t ny, Scalar<Index<T>> max_iter, unsigned char *image) { T iota; for (size_t i = 0; i < VectorSize<T>(); ++i) Set<T>(iota, i, i); T dx = T(xmax - xmin) / T((Scalar<T>)nx); T dy = T(ymax - ymin) / T((Scalar<T>)ny), dyv = iota * dy; for (size_t i = 0; i < nx; ++i) { for (size_t j = 0; j < ny; j += VectorSize<T>()) { Scalar<Index<T>> k(0); T x = xmin + T((Scalar<T>)i) * dx, cr = x, zr = x; T y = ymin + T((Scalar<T>)j) * dy + dyv, ci = y, zi = y; Index<T> kv(0); Mask<T> m(true); do { x = zr*zr - zi*zi + cr; y = T((Scalar<T>)2.0) * zr*zi + ci; MaskedAssign<T>(zr, m, x); MaskedAssign<T>(zi, m, y); MaskedAssign<Index<T>>(kv, m, ++k); m = zr*zr + zi*zi < T((Scalar<T>)4.0); } while (k < max_iter && !MaskEmpty(m)); for (size_t k = 0; k < VectorSize<T>(); ++k) image[ny*i + j + k] = (unsigned char) Get(kv, k); } } } int main(){ double xmin = -2.1, xmax = 1.1; double ymin = -1.35, ymax = 1.35; size_t nx = 1024, ny = 864, max_iter = 500; unsigned char *image_scalar = new unsigned char[nx*ny]; unsigned char *image_simd = new unsigned char[nx*ny]; mandelbrot_v<backend::SIMDNative::Double_v>(xmin, xmax, nx, ymin, ymax, ny, max_iter, image_simd); mandelbrot<double>(xmin, xmax, nx, ymin, ymax, ny, max_iter, image_scalar); for(int i=0 ; i<nx*ny; ++i){ assert(image_simd[i] == image_scalar[i]); } delete[] image_scalar; delete[] image_simd; }
My compiler version is clang 16.0.6, and the compile options are -march=native -O2 -std=c++2a
-march=native -O2 -std=c++2a