Except for few modifications the code for assembling a vector looks like:
T w1[3] = {0};
T w0[1] = {0};
for (int ip = 0; ip < Np; ip++)
{
// vectorized across
for (int ic = 0; ic < Nc; ++ic)
{
w1[0] += w[4 + ic] * D0[ip][ic];
w1[1] += w[4 + ic] * D1[ip][ic];
w1[2] += w[4 + ic] * D2[ip][ic];
}
for (int ic = 0; ic < 4; ++ic)
w0[0] += w[ic] * Phi[ip][ic];
// Not vectorized
T fw[3] = {0};
fw[0] = (G[0][0] * w1[0] + G[0][1] * w1[1] + G[0][2] * w1[2]) * weights[ip];
fw[1] = (G[1][0] * w1[0] + G[1][1] * w1[1] + G[1][2] * w1[2]) * weights[ip];
fw[2] = (G[2][0] * w1[0] + G[2][1] * w1[1] + G[2][2] * w1[2]) * weights[ip];
for (int i = 0; i < Nc; ++i)
A[i] += fw[0 * Np + ip] * D0[ip][i] + fw[1 * Np + ip] * D1[ip][i] + fw[2 * Np + ip] * D2[ip][i];
}
The application of pull-back and geometric information is not vectorized.
We should consider storing more data at quadrature points to enable vectorization across quadrature points.
Except for few modifications the code for assembling a vector looks like:
The application of pull-back and geometric information is not vectorized. We should consider storing more data at quadrature points to enable vectorization across quadrature points.