How to improve fitting quality

Hi, I am trying to use sindy to fit real data, but it seems that in some cases the result is not that ideal: Is there any to improve the fitting quality?

I use polynomial library up to order 4 and MIOSR optimizer. Here m values is from real data and p-values are simulated by dp/dt = m - p; p(0) = m(0). I pass p as a known variable that does not need to fit a differential equation for. Generally, i want an ode of dm/dt = f(m,p)

feature_names = ['m','p']
tensor_array = [[1, 1]]
optimizer = ps.MIOSR(target_sparsity=4)
lib = ps.GeneralizedLibrary(
    libraries = [ps.PolynomialLibrary(degree = 4), ps.PolynomialLibrary(degree = 4)],
    inputs_per_library = np.array([[0,0],[1,1]]),
)

model = ps.SINDy(feature_names=feature_names, optimizer=optimizer,feature_library=lib)
model.fit(x=m, t=t, u=p)

model.print(precision=8)

Here is the minimal working example:

m = np.array([8.06507614, 8.24492219, 8.29733249, 8.27391431, 8.49718669, 8.68815935,
                     8.39065528, 8.35376189, 8.92140227, 9.2476673, 9.48261095, 9.79866641,
                     9.82507046, 9.7660012, 9.94113146, 10.27896051, 10.64427175, 10.97535268,
                     11.22499895, 11.75912975, 12.11423074, 11.96757811, 12.13828192, 12.62223643,
                     12.91182968, 12.94383923, 12.97469115, 13.3939334, 13.7154822, 14.60043218,
                     14.57654516, 14.7120959, 14.90908813, 15.12920214, 15.40044752, 15.66303872,
                     15.65100997, 15.64090564, 15.87576737, 15.83727529, 15.82679622, 15.83681249,
                     15.82346455, 15.72258733, 15.56106163, 16.03674107, 15.82245033, 15.3826691,
                     15.57294763, 16.00757809, 15.91011784, 15.68829094, 15.37313029, 15.36610578,
                     14.87910211, 14.1870185, 14.06456457, 13.78165902, 14.02614256, 13.99943065,
                     13.85595167, 13.86556443, 13.52033978, 13.46259513, 13.39942545, 13.12365366,
                     13.02415026, 12.96492241, 12.63318619, 12.88949443, 12.9291201, 12.52867013,
                     12.12117579, 12.13088467, 12.08772977, 12.19449426, 12.44300319, 12.50561571,
                     12.62035792, 12.74581865, 12.85686396, 12.93750886, 12.86765351, 12.77832373,
                     12.83034304, 12.74137705, 12.99730086, 13.42971092, 13.83743797, 14.22769074,
                     14.19513478, 14.31002351, 14.54601256, 14.59310633, 14.62945314, 14.8176819,
                     15.05254995, 15.18112703, 15.27713995, 15.89731939])
p = np.array([8.06507614, 8.06597069, 8.06811711, 8.0703545, 8.07280855, 8.07895965,
           8.08329598, 8.08581584, 8.09130106, 8.10119173, 8.11379178, 8.12885368,
           8.14602556, 8.1623298, 8.17866478, 8.19787272, 8.22024448, 8.24631367,
           8.27420942, 8.30616692, 8.34293214, 8.37939055, 8.4155627, 8.45509929,
           8.49811784, 8.54224107, 8.58559589, 8.63175877, 8.67956188, 8.73521254,
           8.79319521, 8.85112407, 8.91044054, 8.97090231, 9.03351677, 9.09801507,
           9.16364965, 9.22787314, 9.29260202, 9.35786051, 9.42209449, 9.48605617,
           9.54848862, 9.61130243, 9.67056113, 9.73093899, 9.79306256, 9.85036282,
           9.90568594, 9.96436887, 10.02460564, 10.08191488, 10.13569705, 10.18802305,
           10.23774149, 10.28031509, 10.31851218, 10.35380941, 10.38857735, 10.42519005,
           10.45975002, 10.49386741, 10.52597799, 10.55510637, 10.58449266, 10.61052871,
           10.63487625, 10.65918176, 10.6799882, 10.70052555, 10.72245792, 10.74308543,
           10.75812182, 10.77170939, 10.78501573, 10.79809474, 10.81329003, 10.82978952,
           10.84693826, 10.8652033, 10.88443588, 10.90444099, 10.92456141, 10.9431893,
           10.96167618, 10.97980966, 10.99796622, 11.02002278, 11.0459949, 11.07566367,
           11.10720537, 11.13791687, 11.17085469, 11.20461728, 11.23845613, 11.27293371,
           11.30929193, 11.34745341, 11.38545782, 11.42719932])
t = np.linspace(0.011712133745535036, 0.9999999680023852, 100)

### create fitting problem and fit
feature_names = ['m','p']
tensor_array = [[1, 1]]
optimizer = ps.MIOSR(target_sparsity=4)
lib = ps.GeneralizedLibrary(
    libraries = [ps.PolynomialLibrary(degree = 4), ps.PolynomialLibrary(degree = 4)],
    inputs_per_library = np.array([[0,0],[1,1]]),
)

model = ps.SINDy(feature_names=feature_names, optimizer=optimizer,feature_library=lib)
model.fit(x=m, t=t, u=p)

model.print(precision=8)

### unimportant just to plot the outcome
def convert_equation(equation):
    equation = re.sub(r'\(m\)\'\s=', '', equation)
    equation = re.sub(r'\^', '**', equation)
    equation = re.sub(r'(?<=\d)\s*([a-zA-Z])', r'* \1', equation)
    equation = re.sub(r'\s+1\s+', ' ', equation)   
    equation = re.sub(r'\s+\+\s+\-', ' - ', equation)
    return equation

new_output = io.StringIO()
sys.stdout = new_output
model.print(precision=8)
sys.stdout = sys.__stdout__
captured_output = new_output.getvalue()
new_output.close()
converted_equation = convert_equation(captured_output.strip())

def model(y, t):
    m, p = y
    lambda_expression = eval("lambda m, p: " + converted_equation)
    dmdt = lambda_expression(m, p) 
    dpdt = m - p

    return [dmdt, dpdt]

m0 = m[0]
p0 = p[0]
initial_conditions = [m0, p0]

solution = odeint(model, initial_conditions, t)
m2 = solution[:, 0]
p2 = solution[:, 1]

plt.figure()
ax1 = plt.gca()  
plt.scatter(t, m, label='interpolated m values', c='blue')
ax1.plot(t, m2, label="fitted m(t)", color="y")
ax1.legend(loc="upper left")
ax1.set_xlabel("time (min)")
ax1.set_ylabel("mRNA transcriptional activity per cell min ^(-1)")
ax1.grid(True)

ax2 = ax1.twinx()
plt.plot(t, p, label='Solved p values')
ax2.plot(t, p2, label="fitted p(t)", color="r")
ax2.legend(loc="lower right")
ax2.set_ylabel("Another unit for p")

plt.show()

Thank you so much for your time and looking forward to your reply!

dynamicslab / pysindy

How to improve fitting quality #405