HARPgroup / HSPsquared

Hydrologic Simulation Program Python (HSPsquared)
GNU Affero General Public License v3.0
1 stars 0 forks source link

Tokenized array object and dict of array objects dataMatrix solver #41

Closed rburghol closed 1 year ago

rburghol commented 1 year ago

Add an interpolated lookup, which is definitely a high overhead operation, runtime increases substantially, 1 lookup is about 8x the execution time of a single equation op.

Data Model

Parser

import ast

initialize state vars

op_tokens = Dict.empty(key_type=types.int64, value_type=types.i8[:]) state_paths = Dict.empty(key_type=types.unicode_type, value_type=types.int64) state_ix = Dict.empty(key_type=types.int64, value_type=types.float64) dict_ix = Dict.empty(key_type=types.int64, value_type=types.float32[:,:])

this is what it would look like reading in the JSON string

mjs = '[["0", "Qin"],["5.0", "flowby_cond3"],["1000000.0", "flowby_cond3"]]'

should replace variable names with full variable paths, to later use to find path vars

meq = ast.literal_eval(mjs) mixs = meq

set path info

parent_path = "/STATE/INTAKE_001" mpath = "/STATE/INTAKE_001/flowby"

iterate through each row and col, see if it is a string or a number

ri = 0 for mrow in meq: ci = 0 for mcol in mrow: mval = meq[ri][ci] if not is_float_digit(mval): vt = "var" mv_path = find_state_path(state_paths, parent_path, mval) mix = get_state_ix(state_ix, state_paths, mv_path ) if isinstance(mix, bool):

this does not handle possibility that the requested variable is not yet defined.

            # really, this should fail if so, but we will force the issue here for testing
            mix = set_state(state_ix, state_paths, mv_path, 0.0)
    else:
        vt = "num"
        mv_path = mpath + "/states/" + str(ri) + "_" + str(ci)
        mix = set_state(state_ix, state_paths, mv_path, float(mval))
    print("Type", vt, "path:", mv_path, "val", mval, "mix", mix )
    mixs[ri][ci] = mix
    ci += 1
ri += 1

#### **Code 1:** High speed execution of tokenized config.

now add simple lookup support

@njit def specl_lookup(data_table, keyval, lutype, valcol): if lutype == 2: #stair-step idx = (data_table[:, 0][0:][(data_table[:, 0][0:]- keyval) <= 0]).argmax() luval = data_table[:, valcol][0:][idx] elif lutype == 1: # interpolate luval = np.interp(keyval,data_table[:, 0][0:], data_table[:, valcol][0:])

# show value at tis point
return luval

@njit def exec_tbl_eval(op, state_ix, dict_ix): ix = op[1] dix = op[2] mx_type = op[3] # not used yet, what type of table? in past this was always 1-d or 2-d key1_ix = op[4]

print("ix, dict_ix, mx_type, key1_ix", ix, dix, mx_type, key1_ix)

lutype = op[5]
valcol = op[8]
data_table = dict_ix[dix]
keyval = state_ix[key1_ix]
#print("Key, ltype, val", keyval, lutype, valcol)
result = specl_lookup(data_table, keyval, lutype, valcol)
return result

load source from openmi-om/py/XdataMatrix.class.py

this is state ix var 3

doing a lookup table (like FTABLE) where we are given an arbitrary key column in a table and an arb value

column and we want the output to be the interpolated in the value column according to the key column

op is form: op_type, state_ix, dict_ix, mx_type (not used yet), key1_ix, key1_lu_type, key2_ix, key2_lu_type, val_col

op_tokens[3] = np.asarray([2, 3, 4, 0, 5, 2, 0, 0, 2], dtype="i8") # need to think long and hard about these required tokens

notes:

- dict_ix key is NOT always the same as the state_ix key

since we may allow a "matrix accessor" to perform a lookup on another table

- val_col is only used if this is a matrix_accessor and the lookup is a single column

state_ix[3] = 0.0 # the state var for this matrix accessor (since it is an ftable style lookup) state_ix[4] = 0.0

create a psuedo state variable for the storage in the ftable

state_ix[5] = 200000.0 dict_ix = Dict.empty(key_type=types.int64, value_type=types.float32[:,:]) dict_ix[4] = XdataMatrix.parseMatrix("[ [ 0.0, 170.0, 0], [195200.0, 240.0, 8890.6], [204252.8, 241.0, 9301.5], [213736.0, 242.0, 9712.5] ]")

specl_lookup(dict_ix[4], 200000, 2, 2) exec_tbl_eval(op_tokens[3], state_ix, dict_ix)

@njit def iterate_all_nall(op_tokens, state_ix, dict_ix, steps): checksum = 0.0 for step in range(steps): for i in op_tokens.keys(): if op_tokens[i][0] == 1: state_ix[i] = exec_eqn_nall(op_tokens[i], state_ix) elif op_tokens[i][0] == 2: state_ix[i] = exec_tbl_eval(op_tokens[i], state_ix, dict_ix) checksum += state_ix[i] return checksum

start = time.time() num = iterate_all_nall(op_tokens, state_ix, dict_ix, steps) end = time.time() print(end - start, "seconds")