Performance considerations

adtzlr commented 2 years ago

If the hyperelastic materials would be based on C = F.T @ F and C would be stored as a vector in Voigt-notation, this gives a speed-up of 2 in stress evaluation and of 6 in elasticity evaluation! However, the code gets much more complicated.

adtzlr commented 2 years ago

Reference Example:

import casadi as ca

F = ca.SX.sym("F", 3, 3)

def W(C6):
    C = tensor(C6, 2)
    I1 = ca.trace(C)
    J = ca.sqrt(ca.det(C))
    return 0.5 * (J ** (-2 / 3) * I1 - 3) + 20 * (J - 1) ** 2 / 2

def voigt(C):
    return ca.vertcat(
        C[0, 0], C[1, 1], C[2, 2], 2 * C[0, 1], 2 * C[1, 2], 2 * C[0, 2]
    )

def tensor(S, s):
    return ca.horzcat(
        ca.vertcat(S[0], S[3] / s, S[5] / s),
        ca.vertcat(S[3] / s, S[1], S[4] / s),
        ca.vertcat(S[5] / s, S[4] / s, S[2]),
    )

def P(F):
    C6 = ca.SX.sym("C", 6, 1)
    S6 = 2 * ca.gradient(W(C6), C6)
    C = F.T @ F
    S = ca.Function("S", [C6], [S6])(voigt(C))
    return F @ tensor(S, 1)

stress = ca.Function("P", [F], [P(F)])
elasticity = ca.Function("A", [F], [ca.jacobian(P(F), F)])

import numpy as np

n = 100000
f = np.tile(np.eye(3) + np.random.rand(3, 3) / 10, (1, n))

Stress = stress.map(n, "thread", 16)
Elasticity = elasticity.map(n, "thread", 16)

P = Stress(f)
A = Elasticity(f)

import matadi as mat

nh = mat.MaterialHyperelastic(mat.models.neo_hooke, C10=0.5, bulk=20)

F2 = f.reshape(3, 3, 1, -1, order="F")
P2 = nh.gradient([F2])[0]
A2 = nh.hessian([F2])[0]

import felupe as fem

nh2 = fem.NeoHooke(mu=1, bulk=20)
P3 = nh2.gradient([F2])[0]
A3 = nh2.hessian([F2])[0]

assert np.allclose(P, P2.reshape(3, -1, order="F"))
assert np.allclose(A, A2.reshape(9, -1, order="F"))

# %timeit Elasticity(f)
# %timeit nh.hessian([F2])[0]s

adtzlr commented 2 years ago

%timeit Elasticity(f)
224 ms ± 6.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

%timeit nh.hessian([F2])[0]
1.28 s ± 56.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

(!!!)

adtzlr commented 2 years ago

An enhanced version is now even faster:

import casadi as ca
from functools import partial

F = ca.SX.sym("F", 3, 3)

def neo_hooke(C, mu=2.0):
    I1 = ca.trace(C)
    J = ca.sqrt(ca.det(C))
    return mu / 2 * (J ** (-2 / 3) * I1 - 3) + 20 * (J - 1) ** 2 / 2

def asvector(C):
    return ca.vertcat(
        C[0, 0], C[1, 1], C[2, 2], C[0, 1], C[1, 2], C[0, 2]
    )

def astensor(S):
    return ca.vertcat(
        ca.horzcat(S[0], S[3], S[5]),
        ca.horzcat(S[3], S[1], S[4]),
        ca.horzcat(S[5], S[4], S[2]),
    )

def psi(F, fun, **kwargs):
    C6 = ca.SX.sym("C", 6, 1)
    w = partial(fun, **kwargs)(astensor(C6))
    C = F.T @ F
    return ca.Function("W", [C6], [w])(asvector(C))

W = psi(F, neo_hooke, mu=1.0)
d2WdFdF, dWdF = ca.hessian(W, F)

energy = ca.Function("W", [F], [W])
stress = ca.Function("P", [F], [dWdF])
elasticity = ca.Function("A", [F], [d2WdFdF])

import numpy as np

n = 100000
f = np.tile(np.eye(3) + np.random.rand(3, 3) / 10, (1, n))

Stress = stress.map(n, "thread", 16)
Elasticity = elasticity.map(n, "thread", 16)

P = Stress(f)
A = Elasticity(f)

import matadi as mat

nh = mat.MaterialHyperelastic(mat.models.neo_hooke, C10=0.5, bulk=20)

F2 = f.reshape(3, 3, 1, -1, order="F")
P2 = nh.gradient([F2])[0]
A2 = nh.hessian([F2])[0]

import felupe as fem

nh2 = fem.NeoHooke(mu=1, bulk=20)
P3 = nh2.gradient([F2])[0]
A3 = nh2.hessian([F2])[0]

assert np.allclose(P, P2.reshape(3, -1, order="F"))
assert np.allclose(A, A2.reshape(9, -1, order="F"))

# %timeit Elasticity(f)
# %timeit nh.hessian([F2])[0]

adtzlr commented 2 years ago

It seems most of the time is spent in converting a casadi DM to a numpy array. A solution would be to convert small chunks in parallel, e.g. using joblib.

adtzlr / matadi

Performance considerations #120