HARPgroup / HSPsquared

Hydrologic Simulation Program Python (HSPsquared)
GNU Affero General Public License v3.0
1 stars 0 forks source link

Equation Solver: integer keyed tokenized #38

Closed rburghol closed 1 year ago

rburghol commented 1 year ago

Prototype Approaches and Trade-offs:

Performance: Execute Equations Stored as Sequence of 1 operator and 1-2 operands

Parse Equations into Sequential Operators/Operands

# we parse the equation during readuci/pre-processing and break it into njit'able pieces
exprStack[:] = []
results = BNF().parseString(allops["/OBJECTS/RCHRES_0001/discharge_mgd/equation"], parseAll=True)

ps = []
ep = exprStack
pre_evaluate_stack(ep[:], ps)

Data Model

Early String Keyed Variation Was Slow

First draft iterator of operators had string keys (i.e. state["/RCHRES/SPCLS/Qin..."], see #42 ):

Code 1: Simple Demo with integer keyed operators.

import numpy as np
import time
from numba.typed import Dict
from numpy import zeros
from numba import int8, float32, njit, types    # import the types

def is_float_digit(n: str) -> bool:
     try:
         float(n)
         return True
     except ValueError:
         return False

@njit
def exec_eqn_nall(op_tokens, state_ix):
    op_class = op_tokens[0] # we actually will use this in the calling function, which will decide what 
                      # next level function to use 
    op = op_tokens[2]
    val1 = state_ix[op_tokens[3]]
    val2 = state_ix[op_tokens[4]]
    result = 0
    if op == 1:
        result = val1 - val2
    elif op == 2:
        result = val1 + val2
    elif op == 3:
        result = val1 * val2 
    elif op == 4:
        result = val1 / val2 
    return result 

# function to lop through and execute once for each time step
@njit
def iterate_nall(op_tokens, state_ix, steps):
    checksum = 0.0
    for step in range(steps):
        state_ix[1] = exec_eqn_nall(op_tokens[1], state_ix)
        checksum += state_ix[1]
        #val = exec_eqn_nall(op_tokens[1], state_ix)
        #state_ix[1] = val
        #checksum += val
    return checksum

op_tokens = Dict.empty(key_type=types.int64, value_type=types.i8[:])
# todo: replace the population of op_tokes with the class handler code
op_tokens[1] = np.asarray([1, 1, 1, 2, 3], dtype="i8")
op_tokens[2] = np.asarray([1, 1, 2, 2, 3], dtype="i8")

state_ix = Dict.empty(key_type=types.int64, value_type=types.float64)
state_ix[2] = 1.0 # value of the variable constant from the equation for discharge_mgd 
state_ix[3] = 0.15 # the value of the variable "consumption" 

exec_eqn_nall(op_tokens[1], state_ix)

steps = 40 * 24 * 365
start = time.time()
num = iterate_nall(op_tokens, state_ix, steps)
end = time.time()
print(end - start, "seconds")

Code 2: Parse equations, load state_ix, and execute loop of integer keyed, tokenized equations

Code below loads all necessary elements and tests complex equation with nested tokens (for equation steps):

import numpy as np
import time
from numba.typed import Dict
from numpy import zeros
from numba import int8, float32, njit, types    # import the types

# we parse the equation during readuci/pre-processing and break it into njit'able pieces
# this forms the basis of our object parser code to run at import_uci step 
exprStack[:] = []
#eqn = "(5 + 27.2) * 16.3"
#eqn = "(121.4 + 65.3 * (5 + 27.2)) * 16.3 + 97.2"
eqn = "(75.9 / (5 + 27.2)) * 16.3 + 100.4 / 7.3 - (134 + 23)"
eqn = "(75.9 / (5 + 27.2)) * 16.3 + (100.4 / 7.3 - (134 + 23))^ 2.0"
eqn_eval = "(75.9 / (5 + 27.2)) * 16.3 + pow(100.4 / 7.3 - (134 + 23), 2.0)"
results = BNF().parseString(eqn, parseAll=True)

ps = []
ep = exprStack
pre_evaluate_stack(ep[:], ps)

op_tokens = Dict.empty(key_type=types.int64, value_type=types.i8[:])
state_ix = Dict.empty(key_type=types.int64, value_type=types.float64)

rps = 0
k = append_state(state_ix, 0.0) # initialize this equation operator in the state_ix Dict, returns unique key
tops = [1, 1, len(ps)] # set up the first 2 op class type and state_ix
for i in range(len(ps)):
    if ps[i][0] == '-': op = 1
    if ps[i][0] == '+': op = 2
    if ps[i][0] == '*': op = 3
    if ps[i][0] == '/': op = 4
    if ps[i][0] == '^': op = 5
    if ps[i][1] == None: o1 = -1 
    else: o1 = ps[i][1]
    if ps[i][2] == None: o2 = -1 
    else: o2 = ps[i][2]
    tops.append(op)
    tops.append(o1)
    tops.append(o2)

def append_state(state_ix, var_value):
    if (len(state_ix) == 0):
      val_ix = 1
    else:
        val_ix = max(state_ix.keys()) + 1 # next ix value
    state_ix[val_ix] = var_value
    return val_ix

# now stash the string vars as new state vars
for j in range(len(tops)):
    if isinstance(tops[j], str):
        # must add this to the state array as a constant
        s_ix = append_state(state_ix, float(tops[j]))
        tops[j] = s_ix

op_tokens[k] = np.asarray(tops, dtype="i8")

# test 
exec_eqn_nall_m(op_tokens[k], state_ix)

@njit
def exec_eqn_nall_m(op_token, state_ix):
    op_class = op_token[0] # we actually will use this in the calling function, which will decide what 
                      # next level function to use 
    result = 0
    num_ops = op_token[2]
    s = np.array([0.0])
    s_ix = -1 # pointer to the top of the stack
    s_len = 1
    #print(num_ops, " operations")
    for i in range(num_ops): 
        op = op_token[3 + 3*i]
        t1 = op_token[3 + 3*i + 1]
        t2 = op_token[3 + 3*i + 2]
        # if val1 or val2 are < 0 this means they are to come from the stack
        # if token is negative, means we need to use a stack value
        #print("s", s)
        if t1 < 0: 
            val1 = s[s_ix]
            s_ix -= 1
        else:
            val1 = state_ix[t1]
        if t2 < 0: 
            val2 = s[s_ix]
            s_ix -= 1
        else:
            val2 = state_ix[t2]
        #print(s_ix, op, val1, val2)
        if op == 1:
            #print(val1, " - ", val2)
            result = val1 - val2
        elif op == 2:
            #print(val1, " + ", val2)
            result = val1 + val2
        elif op == 3:
            #print(val1, " * ", val2)
            result = val1 * val2 
        elif op == 4:
            #print(val1, " / ", val2)
            result = val1 / val2 
        elif op == 5:
            #print(val1, " ^ ", val2)
            result = pow(val1, val2) 
        s_ix += 1
        if s_ix >= s_len: 
            s = np.append(s, 0)
            s_len += 1
        s[s_ix] = result
    result = s[s_ix]
    return result 

exec_eqn_nall_m(op_tokens[k], state_ix)

steps = 40 * 365 * 24

@njit 
def eeee(op_tokens, state_ix, dict_ix, steps):
    checksum = 0.0
    for step in range(steps):
        for i in op_tokens.keys():
            s_ix = op_tokens[i][1] # index of state for this component
            if op_tokens[i][0] == 1:
                state_ix[s_ix] = exec_eqn_nall_m(op_tokens[i], state_ix)
            elif op_tokens[i][0] == 2:
                state_ix[s_ix] = exec_tbl_eval(op_tokens[i], state_ix, dict_ix)
            checksum += state_ix[i]
    return checksum

dict_ix = Dict.empty(key_type=types.int64, value_type=types.float32[:,:])

start = time.time()
num = eeee(op_tokens, state_ix, dict_ix, steps)
end = time.time()
print(end - start, "seconds")
rburghol commented 1 year ago
@njit
def exec_eqn_num(op, ops, state):
    # these 2 retrievals amount to approx. 35% of exec time.
    val1 = state[ops['arg1']]
    val2 = state[ops['arg2']]
    #return 
    #print("val1 = ", val1, "val2 = ", val2, "op = ", ops[0])
    if op == 1:
        result = val1 - val2
    elif op == 2:
        result = val1 + val2
    elif op == 3:
        result = val1 * val2 
    return result 

@njit
def iterate_num(atpop, atpops, state, steps):
    checksum = 0.0
    for step in range(steps):
        checksum += exec_eqn_num(atpop, atpops, state)
    return checksum

# manually set an opnum = 1 (-) for testing 
allops['/OBJECTS/RCHRES_0001/discharge_mgd/_ops/_op0/opnum'] = "1"
atpop = int(allops['/OBJECTS/RCHRES_0001/discharge_mgd/_ops/_op0/opnum'])
start = time.time()
num = iterate_num(atpop, atpops, state, steps)
end = time.time()
print(end - start, "seconds")
rburghol commented 1 year ago

Try nested tokens (for equation steps):

import numpy as np
import time
from numba.typed import Dict
from numpy import zeros
from numba import int8, float32, njit, types    # import the types

def deconstruct_equation(eqn):
# we parse the equation during readuci/pre-processing and break it into njit'able pieces
# this forms the basis of our object parser code to run at import_uci step 
exprStack[:] = []
#eqn = "(5 + 27.2) * 16.3"
#eqn = "(121.4 + 65.3 * (5 + 27.2)) * 16.3 + 97.2"
eqn = "(75.9 / (5 + 27.2)) * 16.3 + 100.4 / 7.3 - (134 + 23)"
eqn = "(75.9 / (5 + 27.2)) * 16.3 + (100.4 / 7.3 - (134 + 23))^ 2.0"
eqn_eval = "(75.9 / (5 + 27.2)) * 16.3 + pow(100.4 / 7.3 - (134 + 23), 2.0)"
results = BNF().parseString(eqn, parseAll=True)

ps = []
ep = exprStack
pre_evaluate_stack(ep[:], ps)

op_tokens = Dict.empty(key_type=types.int64, value_type=types.i8[:])
state_ix = Dict.empty(key_type=types.int64, value_type=types.float64)

rps = 0
k = append_state(state_ix, 0.0) # initialize this equation operator in the state_ix Dict, returns unique key
tops = [1, 1, len(ps)] # set up the first 2 op class type and state_ix
for i in range(len(ps)):
    if ps[i][0] == '-': op = 1
    if ps[i][0] == '+': op = 2
    if ps[i][0] == '*': op = 3
    if ps[i][0] == '/': op = 4
    if ps[i][0] == '^': op = 5
    if ps[i][1] == None: o1 = -1 
    else: o1 = ps[i][1]
    if ps[i][2] == None: o2 = -1 
    else: o2 = ps[i][2]
    tops.append(op)
    tops.append(o1)
    tops.append(o2)

def append_state(state_ix, var_value):
    if (len(state_ix) == 0):
      val_ix = 1
    else:
        val_ix = max(state_ix.keys()) + 1 # next ix value
    state_ix[val_ix] = var_value
    return val_ix

# now stash the string vars as new state vars
for j in range(len(tops)):
    if isinstance(tops[j], str):
        # must add this to the state array as a constant
        s_ix = append_state(state_ix, float(tops[j]))
        tops[j] = s_ix

op_tokens[k] = np.asarray(tops, dtype="i8")

# test 
exec_eqn_nall_m(op_tokens[k], state_ix)

@njit
def exec_eqn_nall_m(op_token, state_ix):
    op_class = op_token[0] # we actually will use this in the calling function, which will decide what 
                      # next level function to use 
    result = 0
    num_ops = op_token[2]
    s = np.array([0.0])
    s_ix = -1 # pointer to the top of the stack
    s_len = 1
    #print(num_ops, " operations")
    for i in range(num_ops): 
        op = op_token[3 + 3*i]
        t1 = op_token[3 + 3*i + 1]
        t2 = op_token[3 + 3*i + 2]
        # if val1 or val2 are < 0 this means they are to come from the stack
        # if token is negative, means we need to use a stack value
        #print("s", s)
        if t1 < 0: 
            val1 = s[s_ix]
            s_ix -= 1
        else:
            val1 = state_ix[t1]
        if t2 < 0: 
            val2 = s[s_ix]
            s_ix -= 1
        else:
            val2 = state_ix[t2]
        #print(s_ix, op, val1, val2)
        if op == 1:
            #print(val1, " - ", val2)
            result = val1 - val2
        elif op == 2:
            #print(val1, " + ", val2)
            result = val1 + val2
        elif op == 3:
            #print(val1, " * ", val2)
            result = val1 * val2 
        elif op == 4:
            #print(val1, " / ", val2)
            result = val1 / val2 
        elif op == 5:
            #print(val1, " ^ ", val2)
            result = pow(val1, val2) 
        s_ix += 1
        if s_ix >= s_len: 
            s = np.append(s, 0)
            s_len += 1
        s[s_ix] = result
    result = s[s_ix]
    return result 

exec_eqn_nall_m(op_tokens[k], state_ix)

steps = 40 * 365 * 24

@njit 
def exec_op_tokens(op_tokens, state_ix, dict_ix, steps):
    checksum = 0.0
    for step in range(steps):
        for i in op_tokens.keys():
            s_ix = op_tokens[i][1] # index of state for this component
            if op_tokens[i][0] == 1:
                state_ix[s_ix] = exec_eqn_nall_m(op_tokens[i], state_ix)
            elif op_tokens[i][0] == 2:
                state_ix[s_ix] = exec_tbl_eval(op_tokens[i], state_ix, dict_ix)
            checksum += state_ix[i]
    return checksum

dict_ix = Dict.empty(key_type=types.int64, value_type=types.float32[:,:])

start = time.time()
num = eeee(op_tokens, state_ix, dict_ix, steps)
end = time.time()
print(end - start, "seconds")
rburghol commented 1 year ago

Running this as a single equation with 7 operands and without any data retrieval:


@njit
def iterate_eqn(op_tokens, state_ix, dict_ix, steps):
    for i in range(steps):
        for i in op_tokens.keys():
            s_ix = op_tokens[i][1] # index of state for this component
            one_value = state_ix[op_tokens[i][2]]
            another_value = state_ix[op_tokens[i][3]]
            result = (one_value / (5 + 27.2)) * 16.3 + pow(another_value/ 7.3 - (134 + 23), 2.0)
            state_ix[s_ix ] = result
    return result

dummy_tokens = Dict.empty(key_type=types.int64, value_type=types.i8[:])
dummy_tokens[1] = np.asarray([1, 1, 3, 2], dtype="i8")
start = time.time()
num = iterate_eqn(dummy_tokens, state_ix, dict_ix, steps)
end = time.time()
print(end - start, "seconds")
# 0.02541184425354004 seconds