Open JamesKunstle opened 4 months ago
Proposed solution:
#2
above, because we only move the options over once, and successive requests for filtering are unnecessary.import dash
from dash import callback, Input, Output, dcc, html
from dash_mantine_components import MultiSelect
import time
import redis
import pickle
app = dash.Dash(name="__name__")
db = redis.Redis()
# sets truthy value of 'disabled'
# so when 'disabled = False', MultiSelect is enabled.
MULTISELECT_ENABLED = False
MULTISELECT_DISABLED = True
MULTISELECT_BLOCKED_MESSAGE = "Loading options..."
MULTISELECT_READY_MESSAGE = "Pick a web framework!"
# search bar options stored in Redis.
# if >512MB of options (why?), use a Redis list instead of pickling.
sb_options = ["Vue", "React", "Next.js", "Remix", "Angular"]
SB_OPTIONS_KEY = "8knot:searchbar:options"
sb_options_pickle = pickle.dumps(sb_options)
db.set(name=SB_OPTIONS_KEY, value=sb_options_pickle)
app.layout = html.Div(
children=[
MultiSelect(
id="multiselect",
data=["something"],
clearable=True,
searchable=True,
disabled=MULTISELECT_DISABLED,
placeholder=MULTISELECT_BLOCKED_MESSAGE,
),
]
)
@callback(
[
Output("multiselect", "data"),
Output("multiselect", "disabled"),
Output("multiselect", "placeholder"),
],
Input("multiselect", "clearable"),
)
def set_sb_options(_):
"""
On-mount callback to set updated options in search bar.
"""
time.sleep(2)
start = time.time_ns()
opts = pickle.loads(db.get(SB_OPTIONS_KEY))
end = time.time_ns()
# can take between 0.1ms and 3ms -- very fast to grab from cache.
print(f"Load search bar options from Redis + unpickle: {(end - start) / 1e6} ms")
return opts, MULTISELECT_ENABLED, MULTISELECT_READY_MESSAGE
if __name__ == "__main__":
app.run(debug=True)
set_sb_options
callback do the same thing as required, kicking off a revalidate_sb_options
celery task if the timestamp of the options are outdated.def set_sb_options(_):
"""
On-mount callback to set updated options in search bar.
"""
# pseudocode btw
if (db.get(SB_OPTIONS_TTL_TS_KEY) < (time.now() - time.timedelta('8hr')):
revalidate_sb_options.delay() # kick off asynchronous revalidation task
# get outdated options anyway since this is a synchronous request- can't wait for revalidation.
# could set a notification that results might be out of date, and to try again in a sec, maybe.
time.sleep(2)
start = time.time_ns()
opts = pickle.loads(db.get(SB_OPTIONS_KEY))
end = time.time_ns()
# can take between 0.1ms and 3ms -- very fast to grab from cache.
print(f"Load search bar options from Redis + unpickle: {(end - start) / 1e6} ms")
return opts, MULTISELECT_ENABLED, MULTISELECT_READY_MESSAGE
The first approach is undoubtedly more seamless because all of the updates happen behind the scenes, but it requires taking on another tool w/ Celery Beat, and defining it in Openshift and Compose. The second alternative might cause non-zero more marginal user discomfort, but it puts the logic in the same place, and may be clearer to maintain.
Next step
Should implement a toy example of the application with the above proposed solution. Some remaining questions are:
Answers to above:
Example application:
Dash app:
import dash
from dash import html, dcc
import dash_mantine_components as dmc
import time
import redis
import pickle
db = redis.Redis()
app = dash.Dash(name=__name__)
server = app.server
app.layout = html.Div(
children=[
html.Ul(id="selected-list"),
dmc.TransferList(
id="repo-multiselect",
limit=75,
placeholder="Select a repo:",
value=[[],[]],
showTransferAll=False,
searchPlaceholder="Loading options...",
),
html.Button("Submit", id="submit-button", n_clicks=0),
]
)
@dash.callback(
dash.Output("selected-list", "children"),
dash.Input("submit-button", "n_clicks"),
dash.State("repo-multiselect", "value"),
)
def handle_clicks(n_clicks, multiselect_values):
if not multiselect_values:
return []
return [html.Li(children=str(v)) for v in multiselect_values[1]]
@dash.callback(
dash.Output("repo-multiselect", "value"),
dash.Output("repo-multiselect", "searchPlaceholder"),
dash.Input("repo-multiselect", "limit") # random static input prop to trigger callback
)
def load_sb_options(_):
"""
Called to populate options in search bar after initial render.
Pulls updated options from cache.
"""
start = time.time_ns()
searchbar_options = pickle.loads(db.get("8knot:sb_options"))
end = time.time_ns()
print(f"Time to get options from Redis: {(end - start)/10e6}ms")
return [searchbar_options, []], "Search for repos and orgs!"
if __name__ == "__main__":
app.run(host="localhost", port="8080", debug=False)
Celery Beat revalidation
import celery
from celery.schedules import crontab
import redis
import pickle
import sqlalchemy as salc
import time
celery_app = celery.Celery(
broker='redis://@localhost:6379/0',
backend='redis://@localhost:6379/0',
)
@celery_app.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
# revalidate searchbar options cache every 10 seconds. Will be 4 or 8 hours in prod.
sender.add_periodic_task(10.0, revalidate_cached_sb_opts.s(), name="sb opts revalidation")
# can add postgres cache invalidation logic as a periodic task here too!
@celery_app.task
def revalidate_cached_sb_opts():
"""
Queries all searchbar options from source-of-truth DB.
Transforms options into component-consumable format.
Stores options in Redis.
TODO: handle db/cache communication errors.
"""
# connect to redis
db = redis.Redis()
def get_sb_options_from_db():
# connect to database
engine = salc.create_engine("sqlite:///repos.sqlite", echo=False)
with engine.connect() as conn:
res = conn.execute(
salc.text(
"select * from repo_list rl order by rl.rg_name, length(rl.repo_git) asc"
)
)
data = res.all()
return data
start_reval = time.time_ns()
new_opts = get_sb_options_from_db()
transformed_opts = [{"label": f"{t[3]} || {t[1]}", "value": t[1]} for t in new_opts]
db.set("8knot:sb_options", value=pickle.dumps(transformed_opts))
end_reval = time.time_ns()
# log success
print(f"revalidated searchbar options! took {(end_reval - start_reval) / 1.0e6} ms")
Definitely not too slow to be useful. I think that this is the right solution for what we're wanting.
This is the general architectural design that I'm going off of.
Currently: On app startup, all valid repositories and orgs are queried from Augur exactly once, and kept as a constant global variable.
When user's interact with the search bar, each keystroke is an HTTP request to the backend which filters the list of repos/orgs that the user receives as search bar options.
This has two downsides:
This has also contributed to a fair number of subtle, confusing bugs: https://github.com/oss-aspen/8Knot/issues/611 https://github.com/oss-aspen/8Knot/issues/542 https://github.com/oss-aspen/8Knot/issues/294