Closed lgarithm closed 6 years ago
#include <array> #include <cstdio> template <typename T> struct vec3 { std::array<T, 3> _val; }; template <typename T> inline double dot3d(const vec3<T> &p, const vec3<T> &q) { T t = 0; for (int i = 0; i < 3; ++i) { t += p._val[i] * q._val[i]; } return t; } void test_1() { using T = double; vec3<T> p1 = {1, 2, 3}; vec3<T> p2 = {4, 5, 6}; double s = 0; const int n = 1000000000; for (int i = 0; i < n; ++i) { double l = dot3d(p1, p2); s += l; } printf("%f\n", s); } int main() { test_1(); return 0; }
clang++ can speed up by X25 with -fffast-math
objdump -d shows the optimized code used addpd instead of addsd
objdump -d
clang++ can speed up by X25 with -fffast-math