Open simonw opened 7 months ago
Since many enrichments will work perfectly fine if they go a row at a time, rather than doing a batch optimization, this could be an easier way to write them.
Signature could be:
async def enrich_row(
self,
datasette,
db: Database,
table: str,
row: dict,
pks: List[str],
config: dict,
job_id: int,
):
So you get passed the row
itself and also the IDs extracted out for you.
If that method raises an exception it can get automatically logged.
One thing I haven't figured out yet: it would be nice if that method could return a value which then gets inserted into the output column - but that would mean we would need some kind of default get_config_form()
that only has the single output column in it. That feels weird. Not quite decided on that yet.
Half-finished prototype of this:
diff --git a/datasette_enrichments/__init__.py b/datasette_enrichments/__init__.py
index 7712950..87a706f 100644
--- a/datasette_enrichments/__init__.py
+++ b/datasette_enrichments/__init__.py
@@ -113,7 +113,6 @@ class Enrichment(ABC):
):
pass
- @abstractmethod
async def enrich_batch(
self,
datasette: "Datasette",
@@ -124,7 +123,36 @@ class Enrichment(ABC):
config: dict,
job_id: int,
):
- raise NotImplementedError
+ "Enrich a batch of rows"
+ for row in rows:
+ try:
+ ids = [row[pk] for pk in pks]
+ await async_call_with_supported_arguments(
+ self.enrich_row,
+ datasette=datasette,
+ db=db,
+ table=table,
+ row=row,
+ pks=pks,
+ ids=ids,
+ config=config,
+ job_id=job_id,
+ )
+ except Exception as e:
+ await self.log_error(db, job_id, ids, str(e))
+
+ async def enrich_row(
+ self,
+ datasette: "Datasette",
+ db: "Database",
+ table: str,
+ row: dict,
+ pks: list,
+ ids: list,
+ config: dict,
+ job_id: int,
+ ):
+ "Enrich a single row"
async def increment_cost(
self, db: "Database", job_id: int, total_cost_rounded_up: int
diff --git a/tests/conftest.py b/tests/conftest.py
index c413ee0..9a70a8c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -14,9 +14,25 @@ class MultiCheckboxField(SelectField):
@pytest.fixture(autouse=True)
-def load_uppercase_plugin():
+def install_plugins():
from datasette_enrichments import Enrichment
+ class RowByRowDemo(Enrichment):
+ name = "Row by row demo"
+ slug = "byrow"
+
+ async def enrich_row(
+ self,
+ datasette,
+ db: Database,
+ table: str,
+ row: dict,
+ pks: List[str],
+ config: dict,
+ job_id: int,
+ ):
+ print("Have not quite got design right yet")
+
class UppercaseDemo(Enrichment):
name = "Convert to uppercase"
slug = "uppercasedemo"
@@ -58,15 +74,15 @@ def load_uppercase_plugin():
# Wait 0.3s
await asyncio.sleep(0.3)
- class UppercasePlugin:
- __name__ = "UppercasePlugin"
+ class TestPlugins:
+ __name__ = "TestPlugins"
@hookimpl
def register_enrichments(self):
- return [UppercaseDemo()]
+ return [UppercaseDemo(), RowByRowDemo()]
- pm.register(UppercasePlugin(), name="undo_uppercase")
+ pm.register(TestPlugins(), name="undo_test_plugins")
try:
yield
finally:
- pm.unregister(name="undo_uppercase")
+ pm.unregister(name="undo_test_plugins")
I'm going to leave this for a bit, and inform the design of it from plugins that I and others have created.
Originally posted by @simonw in https://github.com/datasette/datasette-enrichments/issues/7#issuecomment-1829215626