Open rburghol opened 1 year ago
Inside object with single timestep
import numpy as np
from numba import types
from numpy import zeros, any, full, nan, array, int64
from numba.typed import Dict
steps = 5# adapted from HDRY:steps = int(ui['steps'])
ts = Dict.empty(key_type=types.unicode_type, value_type=types.float64[:])
# set up some base data
ts['/RESULTS/RCHRES_001/SPECL/Qin'] = zeros(steps)
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'] = zeros(steps)
ts['/RESULTS/RCHRES_001/SPECL/Qout'] = zeros(steps)
# now populate some values into the Qlocal variable
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][3] = 2.0
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][1] = 7.0
# set up local vars for a sample object
local_vars = ('/RESULTS/RCHRES_001/SPECL/Qin', '/RESULTS/RCHRES_001/SPECL/Qlocal')
local_state = {k: ts[k] for k in local_vars}
# do this calc in direct evaluation
object_value = ts['/RESULTS/RCHRES_001/SPECL/Qout']
for step in range(steps):
object_value[step] = eval("local_state['/RESULTS/RCHRES_001/SPECL/Qlocal'][step] + local_state['/RESULTS/RCHRES_001/SPECL/Qin'][step]")
# print out value dictionary
object_value
# array([0., 7., 0., 2., 0.])
# note that changes to local_state DO propagate back to the ts in this example
# is this always the case?
object_value
# array([0., 7., 0., 2., 0.])
ts['/RESULTS/RCHRES_001/SPECL/Qout']
# array([0., 7., 0., 2., 0.])
How to make numba
compatible fastly executable code?
numba
@njit
compiler does NOT support an array of objects in no python mode, ie. @njit(nopython=True)
import numpy as np
from math import sin, cos
from numba import types
from numpy import zeros, any, full, nan, array, int64
from numba.typed import Dict
steps = 5# adapted from HDRY:steps = int(ui['steps'])
ts = Dict.empty(key_type=types.unicode_type, value_type=types.float64[:])
# set up some base data
ts['/RESULTS/RCHRES_001/SPECL/Qin'] = zeros(steps)
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'] = zeros(steps)
ts['/RESULTS/RCHRES_001/SPECL/Qout'] = zeros(steps)
# now populate some values into the Qlocal variable
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][3] = 2.0
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][1] = 7.0
opcodes = Dict.empty(key_type=types.unicode_type, value_type=types.string)
opcodes ["/RESULTS/RCHRES_001/SPECL/Qlocal"] = "sin(step)"
opcodes ["/RESULTS/RCHRES_001/SPECL/Qout"] = "ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][step] + ts['/RESULTS/RCHRES_001/SPECL/Qin'][step]"
for step in range(steps):
for opkey in opcodes.keys():
print(opkey)
ts[opkey][step] = eval(opcodes[opkey])
ts["/RESULTS/RCHRES_001/SPECL/Qout"]
# array([ 0. , 0.84147098, 0.90929743, 0.14112001, -0.7568025 ])
@njit
? No. eval
is fundamentally incompatible with numba
import time
import numpy as np
from math import sin, cos
from numba import types
from numpy import zeros, any, full, nan, array, int64
from numba.typed import Dict
def iterate_specl(ts, opcodes, steps):
for step in range(steps):
for opkey in opcodes.keys():
# print(opkey)
ts[opkey][step] = eval(opcodes[opkey])
return
ts
#
steps = 365 * 24 * 50# 50 year hourly simulation
ts = Dict.empty(key_type=types.unicode_type, value_type=types.float64[:])
# set up some base data
ts['/RESULTS/RCHRES_001/SPECL/Qin'] = zeros(steps)
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'] = zeros(steps)
ts['/RESULTS/RCHRES_001/SPECL/Qout'] = zeros(steps)
# now populate some values into the Qlocal variable
opcodes = Dict.empty(key_type=types.unicode_type, value_type=types.string)
# note: opcodes are keyed by the ts key that they will set.
# this doesn't have to be this way, but we would need
# to indicate the key to set somewhere in the opcode array
# or another array passed in to the function.
opcodes ["/RESULTS/RCHRES_001/SPECL/Qlocal"] = "sin(step)"
opcodes ["/RESULTS/RCHRES_001/SPECL/Qin"] = "abs(cos(5 * step))"
opcodes ["/RESULTS/RCHRES_001/SPECL/Qout"] = "ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][step] + ts['/RESULTS/RCHRES_001/SPECL/Qin'][step]"
start = time.time()
iterate_specl(ts, opcodes, steps)
end = time.time()
print(end - start)
import pandas
def iterate_specl(ts, opcodes, steps):
for step in range(steps):
for opkey in opcodes.keys():
# print(opkey)
ts[opkey][step] = pandas.eval(opcodes[opkey])
return
Pre-compiled operations:
@njit
, takes 1.15 seconds@njit
takes 9.24 secondsimport time
import numpy as np
from numba import njit, jit
from math import sin, cos
from numba import types
from numpy import zeros, any, full, nan, array, int64
from numba.typed import Dict
steps = 365 * 24 * 50# 50 year hourly simulation
ts = Dict.empty(key_type=types.unicode_type, value_type=types.float64[:])
# set up some base data
ts['/RESULTS/RCHRES_001/SPECL/Qin'] = zeros(steps)
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'] = zeros(steps)
ts['/RESULTS/RCHRES_001/SPECL/Qout'] = zeros(steps)
@njit
def iterate_specl(ts, step):
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][step] = sin(step)
ts['/RESULTS/RCHRES_001/SPECL/Qin'][step] = abs(cos(5 * step))
ts['/RESULTS/RCHRES_001/SPECL/Qout'][step] = ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][step] + ts['/RESULTS/RCHRES_001/SPECL/Qin'][step]
return
start = time.time()
for step in range(steps):
iterate_specl(ts, step)
end = time.time()
print(end - start, "seconds")
# 1.1588082313537598 seconds
# comment out @njit and it takes 9.242465734481812 seconds
1.9670121669769287 seconds
1.544893741607666 seconds
ts['/RESULTS/RCHRES_001/SPECL/Volume'] = zeros(steps)
ts['/RESULTS/RCHRES_001/SPECL/elev'] = zeros(steps)
@njit
def iterate_specl(ts, step):
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][step] = sin(step)
ts['/RESULTS/RCHRES_001/SPECL/Qin'][step] = abs(cos(5 * step))
ts['/RESULTS/RCHRES_001/SPECL/Qout'][step] = ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][step] + ts['/RESULTS/RCHRES_001/SPECL/Qin'][step]
OBJECTS_DATAMTRIX_0001 = array([[0.000000e+00, 1.700000e+02, 0.000000e+00],
[1.952000e+05, 2.400000e+02, 8.890600e+03],
[2.042528e+05, 2.410000e+02, 9.301500e+03],
[2.137360e+05, 2.420000e+02, 9.712500e+03]])
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = ts['/RESULTS/RCHRES_001/SPECL/Qin'][step] * 213736.0
ts['/RESULTS/RCHRES_001/SPECL/elev'][step] = np.interp(ts['/RESULTS/RCHRES_001/SPECL/Volume'][step],OBJECTS_DATAMTRIX_0001[:, 0][0:], OBJECTS_DATAMTRIX_0001[:, 1][0:])
start = time.time() for step in range(steps): iterate_specl(ts, step)
end = time.time() print(end - start, "seconds")
Suppress division by zero. Which would be a huge headache?
1.83119797706604 seconds
ts['/RESULTS/RCHRES_001/SPECL/test'] = zeros(steps)
ts['/RESULTS/RCHRES_001/SPECL/test1'] = zeros(steps)
@njit(error_model="numpy")
def iterate_specl(ts, step):
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][step] = sin(step)
ts['/RESULTS/RCHRES_001/SPECL/Qin'][step] = abs(cos(5 * step))
# uncomment this one to force a div by zero every step
ts['/RESULTS/RCHRES_001/SPECL/test'][step] = ts['/RESULTS/RCHRES_001/SPECL/Qin'][step] / 0.0
# uncomment this one to NOT have a div by zero every step
#ts['/RESULTS/RCHRES_001/SPECL/test'][step] = ts['/RESULTS/RCHRES_001/SPECL/Qin'][step] / 10.0
ts['/RESULTS/RCHRES_001/SPECL/test1'][step] = ts['/RESULTS/RCHRES_001/SPECL/Qin'][step] / 1.0
ts['/RESULTS/RCHRES_001/SPECL/Qout'][step] = ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][step] + ts['/RESULTS/RCHRES_001/SPECL/Qin'][step]
OBJECTS_DATAMTRIX_0001 = array([[0.000000e+00, 1.700000e+02, 0.000000e+00],
[1.952000e+05, 2.400000e+02, 8.890600e+03],
[2.042528e+05, 2.410000e+02, 9.301500e+03],
[2.137360e+05, 2.420000e+02, 9.712500e+03]])
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = ts['/RESULTS/RCHRES_001/SPECL/Qin'][step] * 213736.0
ts['/RESULTS/RCHRES_001/SPECL/elev'][step] = np.interp(ts['/RESULTS/RCHRES_001/SPECL/Volume'][step],OBJECTS_DATAMTRIX_0001[:, 0][0:], OBJECTS_DATAMTRIX_0001[:, 1][0:])
start = time.time()
for step in range(steps):
iterate_specl(ts, step)
end = time.time()
print(end - start, "seconds")
Does using compiled 2 (or 1) operator functions increase speed?
iterate_specl()
and iterate_specl_fn()
returned in 1.65-1.67 seconds, tested 5-10 times. iterate_specl()
may have enjoed an advantage of roughly 0.01 seconds, which is less than 1%.Inf
and Nan
and > X
or min values.
@njit(error_model="numpy")
def om_add(a, b):
return a + b
@njit(error_model="numpy")
def om_mult(a, b):
return a * b
@njit(error_model="numpy")
def om_div(a, b):
return a / b
@njit(error_model="numpy")
def iterate_specl_fn(ts, step):
ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][step] = sin(step)
ts['/RESULTS/RCHRES_001/SPECL/Qin'][step] = abs(cos(5 * step))
# uncomment this one to force a div by zero every step
ts['/RESULTS/RCHRES_001/SPECL/test'][step] = om_div(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 0.0)
# uncomment this one to NOT have a div by zero every step
#ts['/RESULTS/RCHRES_001/SPECL/test'][step] = om_div(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 10.0)
ts['/RESULTS/RCHRES_001/SPECL/test1'][step] = om_div(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 1.0)
ts['/RESULTS/RCHRES_001/SPECL/Qout'][step] = om_add(ts['/RESULTS/RCHRES_001/SPECL/Qlocal'][step], ts['/RESULTS/RCHRES_001/SPECL/Qin'][step])
OBJECTS_DATAMTRIX_0001 = array([[0.000000e+00, 1.700000e+02, 0.000000e+00],
[1.952000e+05, 2.400000e+02, 8.890600e+03],
[2.042528e+05, 2.410000e+02, 9.301500e+03],
[2.137360e+05, 2.420000e+02, 9.712500e+03]])
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/elev'][step] = np.interp(ts['/RESULTS/RCHRES_001/SPECL/Volume'][step],OBJECTS_DATAMTRIX_0001[:, 0][0:], OBJECTS_DATAMTRIX_0001[:, 1][0:])
start = time.time()
for step in range(steps):
iterate_specl_fn(ts, step)
end = time.time()
print(end - start, "seconds")
ts/state
Dict
slots in place of variable names Dict
DO propagate to the copy send to the jitclass
object property matrix
Dict
to another Dict
do NOT propagate changes, since those items are treated as scalarsnumba
handle the requested data types? yes.jitclass
? But, as noted above, only fully Dict copies propagate changes, # use the above basic XdataMatrix test class code
state = Dict.empty(key_type=types.unicode_type, value_type=types.float64)
state['Qin'] = 179.0
test_table = np.asarray([ [ 0.0, 170.0, state['Qin']], [195200.0, 240.0, 8890.6], [204252.8, 241.0, 9301.5], [213736.0, 242.0, 9712.5] ], dtype= "float32")
ytest = XdataMatrix(2,0,test_table )
ytest.matrix = test_table
ytest.matrix[0,2]
> 179.0
# now change the source matrix value for Qin
state['Qin'] = 188
state['Qin']
> 188.0
# changes are NOT reflected in slot in test_table, nor in ytest.matrix
test_table[0,2]
> 179.0
ytest.matrix[0,2]
> 179.0
ts['/RESULTS/RCHRES_001/SPECL/Qin'] = zeros(steps)
@njit(error_model="numpy")
def iterate_specl_fn(ts, step):
ts['/RESULTS/RCHRES_001/SPECL/Qin'][step] = abs(sin(step)) * 50
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
ts['/RESULTS/RCHRES_001/SPECL/Volume'][step] = om_mult(ts['/RESULTS/RCHRES_001/SPECL/Qin'][step], 213736.0)
start = time.time()
for step in range(steps):
iterate_specl_fn(ts, step)
end = time.time()
print(end - start, "seconds")
# first time
# 3.4200026988983154 seconds
# after compiling
# 2.6950390338897705 seconds
Equation evaluator:
# fourFn.py
#
# Demonstration of the pyparsing module, implementing a simple 4-function expression parser,
# with support for scientific notation, and symbols for e and pi.
# Extended to add exponentiation and simple built-in functions.
# Extended test cases, simplified pushFirst method.
# Removed unnecessary expr.suppress() call (thanks Nathaniel Peterson!), and added Group
# Changed fnumber to use a Regex, which is now the preferred method
# Reformatted to latest pypyparsing features, support multiple and variable args to functions
#
# Copyright 2003-2019 by Paul McGuire
#
from pyparsing import (
Literal,
Word,
Group,
Forward,
alphas,
alphanums,
Regex,
ParseException,
CaselessKeyword,
Suppress,
delimitedList,
)
import math
import operator
exprStack = []
def push_first(toks): exprStack.append(toks[0])
def push_unary_minus(toks): for t in toks: if t == "-": exprStack.append("unary -") else: break
bnf = None
def BNF(): """ expop :: '^' multop :: '' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ] term :: factor [ multop factor ] expr :: term [ addop term ] """ global bnf if not bnf:
# functions that start with 'e' or 'pi' (such as 'exp'); Keyword
# and CaselessKeyword only match whole words
e = CaselessKeyword("E")
pi = CaselessKeyword("PI")
# fnumber = Combine(Word("+-"+nums, nums) +
# Optional("." + Optional(Word(nums))) +
# Optional(e + Word("+-"+nums, nums)))
# or use provided pyparsing_common.number, but convert back to str:
# fnumber = ppc.number().addParseAction(lambda t: str(t[0]))
fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?")
ident = Word(alphas, alphanums + "_$")
plus, minus, mult, div = map(Literal, "+-*/")
lpar, rpar = map(Suppress, "()")
addop = plus | minus
multop = mult | div
expop = Literal("^")
expr = Forward()
expr_list = delimitedList(Group(expr))
# add parse action that replaces the function identifier with a (name, number of args) tuple
def insert_fn_argcount_tuple(t):
fn = t.pop(0)
num_args = len(t[0])
t.insert(0, (fn, num_args))
fn_call = (ident + lpar - Group(expr_list) + rpar).setParseAction(
insert_fn_argcount_tuple
)
atom = (
addop[...]
+ (
(fn_call | pi | e | fnumber | ident).setParseAction(push_first)
| Group(lpar + expr + rpar)
)
).setParseAction(push_unary_minus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left
# exponents, instead of left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor <<= atom + (expop + factor).setParseAction(push_first)[...]
term = factor + (multop + factor).setParseAction(push_first)[...]
expr <<= term + (addop + term).setParseAction(push_first)[...]
bnf = expr
return bnf
epsilon = 1e-12 opn = { "+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "^": operator.pow, }
fn = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "exp": math.exp, "abs": abs, "trunc": int, "round": round, "sgn": lambda a: -1 if a < -epsilon else 1 if a > epsilon else 0,
"multiply": lambda a, b: a * b,
"hypot": math.hypot,
# functions with a variable number of arguments
"all": lambda *a: all(a),
}
fns = { "sin": "math.sin", "cos": "math.cos", "tan": "math.tan", "exp": "math.exp", "abs": "abs", "trunc": "int", "round": "round", }
def evaluate_stack(s): op, num_args = s.pop(), 0 if isinstance(op, tuple): op, num_args = op if op == "unary -": return -evaluate_stack(s) if op in "+-*/^":
op2 = evaluate_stack(s)
op1 = evaluate_stack(s)
return opn[op](op1, op2)
elif op == "PI":
return math.pi # 3.1415926535
elif op == "E":
return math.e # 2.718281828
elif op in fn:
# note: args are pushed onto the stack in reverse order
args = reversed([evaluate_stack(s) for _ in range(num_args)])
return fn[op](*args)
elif op[0].isalpha():
raise Exception("invalid identifier '%s'" % op)
else:
# try to evaluate as int first, then as float if int fails
try:
return int(op)
except ValueError:
return float(op)
def pre_evaluate_stack(s, ps): op, num_args = s.pop(), 0 if isinstance(op, tuple): op, num_args = op if op == "unary -": ps.append([-evaluate_stack(s), 0, 0]) return if op in "+-*/^":
op2 = pre_evaluate_stack(s, ps)
op1 = pre_evaluate_stack(s, ps)
ps.append([ op, op1, op2])
return
elif op == "PI":
ps.append([math.pi, 0, 0]) # 3.1415926535
return
elif op == "E":
ps.append([math.e, 0, 0]) # 2.718281828
return
elif op in fns:
# note: args are pushed onto the stack in reverse order
print("s:", s, "op", op)
args = []
for x in range(num_args):
args.append(pre_evaluate_stack(s, ps))
args.reverse()
args.insert(fns[op], 0)
ps.append(args)
return
elif op[0].isalpha():
return op
else:
# try to evaluate as float else do as string
try:
return int(op)
except ValueError:
return op
First draft iterator of operators parsed above:
24.795029163360596 seconds
from numba import njit
@njit
def exec_eqn_pnj(ops, state):
val1 = state[ops['arg1']]
val2 = state[ops['arg2']]
#print("val1 = ", val1, "val2 = ", val2, "op = ", ops[0])
if ops['op'] == '-':
result = val1 - val2
return result # by returning here, we save roughly 45% of the computational time
if ops['op'] == '+':
result = val1 + val2
return result
if ops['op'] == '*':
result = val1 * val2
return result
return result
@njit
def exec_eqn_pnjopt(ops, state):
# these 2 retrievals amount to approx. 35% of exec time.
val1 = state[ops['arg1']]
val2 = state[ops['arg2']]
#return
#print("val1 = ", val1, "val2 = ", val2, "op = ", ops[0])
if ops['op'] == '-':
result = val1 - val2
elif ops['op'] == '+':
result = val1 + val2
elif ops['op'] == '*':
result = val1 * val2
return result
def is_float_digit(n: str) -> bool:
try:
float(n)
return True
except ValueError:
return False
import numpy as np
import time
from numba.typed import Dict
from numpy import zeros
from numba import int8, float32, njit, types # import the types
# allops is created jere as a Dict, but really it represents the
# hdf5 structure, which is allowed to mix string and numbers
# this was just created because I am testing and being sloppy
# but this particular structure will NOT make it into any @njit functions
# and thus can be a more flexible type
allops = Dict.empty(key_type=types.unicode_type, value_type=types.UnicodeCharSeq(128))
allops["/OBJECTS/RCHRES_0001/discharge_mgd/equation"] = "( (1.0 - consumption - unaccounted_losses) * wd_mgd + discharge_from_gw_mgd)"
# we parse the equation during readuci/pre-processing and break it into njit'able pieces
exprStack[:] = []
results = BNF().parseString(allops["/OBJECTS/RCHRES_0001/discharge_mgd/equation"], parseAll=True)
ps = []
ep = exprStack
pre_evaluate_stack(ep[:], ps)
# need to translate the constants in the equation to hdf5 variables.
# Ex:
# ps[0] = ['-', '1.0', 'consumption']
# 1.0 is a constant. The first constant in the discarge_mgd equation
# is 1.0, so we add it to the hdf5 as '_c1'
# since numeric values cannot be in the same Dict as strnigs in numba/njit
# these go directly into state as well as the hdf5
# then swap the path out for the value 1.0
# this can be done during parse UCI time since we have all python
#state = Dict.empty(key_type=types.unicode_type, value_type=types.float64)
state = Dict.empty(key_type=types.UnicodeCharSeq(128), value_type=types.float64)
parent_path = "/OBJECTS/RCHRES_0001"
object_path = "/OBJECTS/RCHRES_0001/discharge_mgd"
parent_state_path = "/STATE/RCHRES_0001"
state_path = "/STATE/RCHRES_0001/discharge_mgd"
num_co = 0
ops_path = object_path + "/_ops"
ops_state_path = state_path + "/_ops"
for i in range(0, len(ps) - 1):
op_name = "_op" + str(i)
op_path = ops_path + "/" + op_name
op_state_path = ops_state_path + "/" + op_name
for j in range(1,3):
arg_name = "arg" + str(j)
if ps[i][j] == None:
op_str = '_result' # we assume the result from previous step is to be used
ps[i][j] = '_result' # we assume the result from previous step is to be used
if is_float_digit(ps[i][j]):
print(ps[i][j], " is numeric")
num_co += 1
co_name = "_c" + str(num_co)
# where does the object info reside
co_path = op_path + "/" + co_name
# where does the numeric value for this reside
# this is the value that gets used in the actual njit evaluation functions
op_state_path = op_state_path + "/" + co_name
# stash the value here
state[op_state_path] = float(ps[i][j])
op_str = co_name
else:
# this is a variable, so we need to get its path
# spoofed for now
op_str = ps[i][j]
op_state_path = parent_state_path + "/" + ps[i][j]
print(op_path + "/" + arg_name + "/path")
allops[op_path + "/" + arg_name + "/path"] = op_state_path
allops[op_path + "/" + arg_name] = op_str
# now set
allops[op_path + "/op"] = ps[i][0]
#allops["/OBJECTS/RCHRES_0001/discharge_mgd/_ops"]
#allops["/OBJECTS/RCHRES_0001/discharge_mgd/_c1"]
# string functions available to us
#allops["/OBJECTS/RCHRES_0001/discharge_mgd/_ops"] = ps
#allops["/OBJECTS/RCHRES_0001/discharge_mgd/_ops"]
# use dict
import numpy as np
#tpvals = Dict.empty(key_type=types.unicode_type, value_type=types.float64)
tpvals = Dict.empty(key_type=types.UnicodeCharSeq(128), value_type=types.float64)
tpops = Dict.empty(key_type=types.unicode_type, value_type=types.UnicodeCharSeq(128))
tpvals['/STATE/RCHRES_0001/discharge_mgd/_c1'] = 1.0 # constants would be populated during equation parsing, if there were 3 constants in the equation they would be _c1, _c2, and _c3
tpvals['/STATE/RCHRES_0001/consumption'] = 0.15
tpvals['/STATE/RCHRES_0001/consumption'] = 0.15
tpops['op'] = '-'
tpops['arg1'] = '/STATE/RCHRES_0001/Qin/_c1'
tpops['arg2'] = '/STATE/RCHRES_0001/consumption'
exec_eqn_pnj(tpops, tpvals)
atpops = Dict.empty(key_type=types.unicode_type, value_type=types.UnicodeCharSeq(128))
atpops['op'] = allops['/OBJECTS/RCHRES_0001/discharge_mgd/_ops/_op0/op']
atpops['arg1'] = allops['/OBJECTS/RCHRES_0001/discharge_mgd/_ops/_op0/arg1/path']
atpops['arg2'] = allops['/OBJECTS/RCHRES_0001/discharge_mgd/_ops/_op0/arg2/path']
state['/STATE/RCHRES_0001/consumption'] = 0.15
exec_eqn_pnj(atpops, state)
import time
@njit
def iterate_pnj(atpops, state, steps):
checksum = 0.0
for step in range(steps):
checksum += exec_eqn_pnjopt(atpops, state)
#checksum += exec_eqn_pnj(atpops, state)
#exec_eqn_pnj(atpops, state)
return checksum
steps = 24 * 365 * 40
#steps = 24 * 365 * 1
start = time.time()
num = iterate_pnj(atpops, state, steps)
end = time.time()
print(end - start, "seconds")
16.19699454307556 seconds
Turn all state variable references into integer keyed Dict and performance jumps -- executing time comes down by 99% from base case
0.03500032424926758 seconds
Add an interpolated lookup, which is definitely a high overhead operation, runtime increases substantially, 1 lookup is about 8x the execution time of a single equation op.
0.2510030269622803 seconds
Testing large JSON import components (to debug performance lag when embedded in HSP2 versus testing)
facility = ModelObject('facility', river)
c=["flowby", "wd_mgd", "Qintake"]
for k in range(10000):
eqn = str(25*random.random()) + " * " + c[round((2*random.random()))]
newq = Equation('eq' + str(k), facility, eqn)
#eqn = 50.0*random.random()
#newq = ModelConstant('eq' + str(k), facility, eqn)
- Combined step_model() and step_one() into a single function in case calling a sub-function caused delays. No apparent improvement.
- Tested 1,000 Constants, 1000 iterations, exec time: `99 seconds`
- Tested 1,000 Equations, 1000 iterations,, exec time: `273 seconds`
- Commented `pre_step_model()` line in loop
- Tested 1,000 Constants, 1000 iterations, exec time: `16 seconds`
- Tested 1,000 Equations, 1000 iterations,, exec time: `100 seconds`
- Eliminated unused `elif` conditions in `pre_step_model()` function
- Tested 1,000 Constants, 1000 iterations, exec time: `24 seconds`
- Tested 1,000 Equations, 1000 iterations, exec time: `seconds`
### THE slowdown IS ACCESSING THINGS FROM THE OP_TOKENS dict
- simply accessing the op with `op = op_tokens[i][0]` causes a 100x increase in execution time
@njit def pre_step_test(model_exec_list, op_tokens, state_ix, dict_ix, ts_ix, step): for i in model_exec_list: op = op_tokens[i] continue if op_tokens[i][0] == 12:
pre_step_register(op_tokens[i], state_ix, dict_ix)
continue
#elif op_tokens[i][0] == 1:
# # register type data (like broadcast accumulators)
# continue
return
@njit def test_perf(model_exec_list, op_tokens, state_ix, dict_ix, ts_ix, step): for i in model_exec_list: continue return
@njit def iterate_perf(model_exec_list, op_tokens, state_ix, dict_ix, ts_ix, steps): checksum = 0.0 for step in range(steps): pre_step_test(model_exec_list, op_tokens, state_ix, dict_ix, ts_ix, step) test_perf(model_exec_list, op_tokens, state_ix, dict_ix, ts_ix, step)
return checksum
def time_perf(model_exec_list, op_tokens, state_ix, dict_ix, ts_ix, steps):
start = time.time()
iterate_perf(model_exec_list, op_tokens, state_ix, dict_ix, ts_ix, steps)
end = time.time()
print(len(model_exec_list), "components iterated over", siminfo['steps'], "time steps took" , end - start, "seconds")
Test effect of array type for tokens iteration:
from random import random
@njit
def iteration_test(it_ops, it_nums):
ctr = 0
for n in range(it_nums):
for i in range(len(it_ops)):
op = it_ops[i]
ctr = ctr + 1
print("Completed ", ctr, " loops")
num_ops = 1000
test_array1 = int32(zeros((num_ops,64)))
test_array2 = Dict.empty(key_type=types.int64, value_type=types.i8[:])
state_ix2 = Dict.empty(key_type=types.int64, value_type=types.float64)
for i in range(num_ops):
vals = int(random() * 20)
# create an array of length vals
ops = np.random.randint(10, size=vals)
test_array1[i] = pad(ops ,(0,64))[0:64]
test_array2[i] = np.asarray(ops, dtype="i8")
state_ix2[i] = 20.0 * random()
state_ix1 = np.array(list(state_ix2.items()), dtype="float32")
start = time.time()
iteration_test(test_array1, 300000 )
end = time.time()
print(len(test_array1 ), "np.arrray components took" , end - start, "seconds")
# Completed 300000000 loops
# 1000 np.arrray components took 0.018999099731445312 seconds
start = time.time()
iteration_test(test_array2, 300000 )
end = time.time()
print(len(test_array2 ), "Dict components took" , end - start, "seconds")
# Completed 300000000 loops
# 1000 Dict components took 1.084001064300537 seconds
Test array type for state data read-write storage on iteration:
np.array
for storing the "ops" as this was shown to be more efficient above
from random import random
import numpy as np
import time
from numpy import pad
from numba.typed import Dict
from numpy import zeros
from numba import int8, int32, float32, njit, types # import the types
@njit def iteration_test_dat(op_order, it_ops, state_ix, it_nums): ctr = 0 ttr = 0.0 for n in range(it_nums): for i in op_order: op = it_ops[i] getsx = state_ix[i] state_ix[i] = getsx * 1.0 ttr = ttr + getsx ctr = ctr + 1 print("Completed ", ctr, " loops") print("Total value", ttr)
num_ops = 1000 op_order = int32(zeros(num_ops)) test_array1 = int32(zeros((num_ops,64))) test_array2 = Dict.empty(key_type=types.int64, value_type=types.i8[:]) state_ix0 = Dict.empty(key_type=types.int16, value_type=types.float64) state_ix2 = Dict.empty(key_type=types.int64, value_type=types.float64) state_ix4 = Dict.empty(key_type=types.int64, value_type=types.float32)
for i in range(num_ops): vals = int(random() * 20)
ops = np.random.randint(10, size=vals)
test_array1[i] = pad(ops ,(0,64))[0:64]
test_array2[i] = np.asarray(ops, dtype="i8")
state_ix0[i] = 20.0 * random()
state_ix2[i] = state_ix0[i]
state_ix4[i] = state_ix2[i]
op_order[i] = i
state_ix1 = np.array(list(state_ix2.values()), dtype="float32") state_ix3 = np.array(list(state_ix2.values()), dtype="float64")
iteration_test_dat(op_order, test_array1, state_ix1, 1)
start = time.time() iteration_test_dat(op_order, test_array1, state_ix1, 300000 ) end = time.time() print(len(test_array1 ), "np.arrray32 components took" , end - start, "seconds")
start = time.time() iteration_test_dat(op_order, test_array1, state_ix2, 300000 ) end = time.time() print(len(test_array2 ), "Dict64 components took" , end - start, "seconds")
start = time.time() iteration_test_dat(op_order, test_array1, state_ix3, 300000 ) end = time.time() print(len(test_array2 ), "np.arrray64 components took" , end - start, "seconds")
start = time.time() iteration_test_dat(op_order, test_array1, state_ix4, 300000 ) end = time.time() print(len(test_array2 ), "Dict32 components took" , end - start, "seconds")
start = time.time() iteration_test_dat(op_order, test_array1, state_ix0, 300000 ) end = time.time() print(len(test_array2 ), "Dict64 with 16bit index components took" , end - start, "seconds")
Overview
case
andif-then
statements, which allow decision trees to be rendered into conceptual groupings and are therefore more easily understood.JIT and Compilations references
Several objects appear to be solvable with
numba
compilable methods:ts
": https://github.com/HARPgroup/HSPsquared/issues/19import_uci
time with the whole sequence of operationsnumba
and@njit
compatible, so that functions compile rapidly.ts
data frame, then written to thehdf5
file at the end of the simulation. See: https://github.com/HARPgroup/HSPsquared/issues/19Solver Options
Performance Inspection
Types of operations
model_tokenizer_recursive()
,TBD
import numpy as np import time from numba.typed import Dict from numpy import zeros from numba import int8, float32, njit, types # import the types
storage_table = np.asarray([ [ 0.0, 170.0, 0], [195200.0, 240.0, 8890.6], [204252.8, 241.0, 9301.5], [213736.0, 242.0, 9712.5] ], dtype= "float32") steps = 365 24 50# 50 year hourly simulation ts = Dict.empty(key_type=types.unicode_type, value_type=types.float64[:])
set up a place to store the function output
ts['/RESULTS/RCHRES_001/SPECL/elev'] = zeros(steps)
@njit def specl_lookup(data_table, keyval, lutype, valcol): if lutype == 2: #stair-step idx = (data_table[:, 0][0:][(data_table[:, 0][0:]- keyval) <= 0]).argmax() luval = storage_table[:, valcol][0:][idx] elif lutype == 1: # interpolate luval = np.interp(keyval,storage_table[:, 0][0:], storage_table[:, valcol][0:])
test with dedicated njit function that takes parameters ONLY
using stair step
start = time.time() for step in range(steps): value = specl_lookup(storage_table, 200000, 2, 2)
end = time.time() print(end - start, "seconds")
0.22744989395141602 seconds
test with dedicated njit function that takes parameters
AND set value in the ts array OUTSIDE of njit. Note: setting ts increases time by 20x
conclude that most time-consuming aspect is setting data in the ts Dict outside of jit
using stair step
start = time.time() for step in range(steps): ts['/RESULTS/RCHRES_001/SPECL/elev'][step] = specl_lookup(storage_table, 200000, 2, 2)
end = time.time() print(end - start, "seconds")
approx 10x increased execution time due to setting ts value
2.0565061569213867 seconds
@njit def specl_lookup_write(ts, step, data_table, keyval, lutype, valcol): if lutype == 2: #stair-step idx = (data_table[:, 0][0:][(data_table[:, 0][0:]- keyval) <= 0]).argmax() luval = storage_table[:, valcol][0:][idx] elif lutype == 1: # interpolate luval = np.interp(keyval,data_table[:, 0][0:], data_table[:, valcol][0:])
start = time.time() for step in range(steps): val = specl_lookup_write(ts, step, storage_table, 200000, 2, 2)
end = time.time() print(end - start, "seconds")
0.8163661956787109 seconds (after initial function compilation which takes ~ 0.5 seconds)
approx 4x time of execution of function WIHOUT any write
approx 1/3 the time of execution of of function WITH write OUTSIDE of njit
@njit def specl_write(ts, step, luval): ts['/RESULTS/RCHRES_001/SPECL/elev'][step] = luval
start = time.time() for step in range(steps): luval = specl_lookup(storage_table, 200000, 2, 2) specl_write(ts, step, luval)
end = time.time() print(end - start, "seconds")
0.9871413707733154 seconds (first time running took 1.19 secs including compile time)