datasette / datasette-enrichments

Tools for running enrichments against data stored in Datasette
https://enrichments.datasette.io
Apache License 2.0
19 stars 0 forks source link

Figure out a neat pattern for automated tests for enrichment plugins #27

Closed simonw closed 10 months ago

simonw commented 10 months ago

Can include this in the documentation.

simonw commented 10 months ago

Here's the pattern I used for that one:

import asyncio
from datasette.app import Datasette
import pytest
import pytest_asyncio

@pytest_asyncio.fixture()
async def datasette():
    datasette = Datasette()
    db = datasette.add_memory_database("demo")
    await db.execute_write("create table news (body text)")
    for text in ("example a", "example b", "example c"):
        await db.execute_write("insert into news (body) values (?)", [text])
    return datasette

@pytest.mark.asyncio
async def test_enrich_re2(datasette: Datasette):
    cookies = {"ds_actor": datasette.client.actor_cookie({"id": "root"})}
    csrftoken = (
        await datasette.client.get("/-/enrich/demo/news/re2", cookies=cookies)
    ).cookies["ds_csrftoken"]
    cookies["ds_csrftoken"] = csrftoken
    response = await datasette.client.post(
        "/-/enrich/demo/news/re2",
        data={
            "source_column": "body",
            "regex": r"example (?P<letter>[a-z])",
            "single_column": "letter",
            "mode": "single",
            "csrftoken": csrftoken,
        },
        cookies=cookies,
    )
    assert response.status_code == 302
    # Wait 0.5s and the enrichment should have run
    await asyncio.sleep(0.5)
    db = datasette.get_database("demo")
    jobs = await db.execute("select * from _enrichment_jobs")
    job = dict(jobs.first())
    assert job["status"] == "finished"
    assert job["enrichment"] == "re2"
    assert job["done_count"] == 3
    results = await db.execute("select body, letter from news order by body")
    rows = [dict(r) for r in results.rows]
    assert rows == [
        {"body": "example a", "letter": "a"},
        {"body": "example b", "letter": "b"},
        {"body": "example c", "letter": "c"},
    ]

The asyncio.sleep(0.5) in there is a bit ugly, can I do that better?

simonw commented 10 months ago

The need to sleep until the enrichment was done was bothering me, so I came up with a mechanism that leans on asyncio.Event to help avoid having to do that.

simonw commented 10 months ago

That pattern didn't work, because:

async def test_re2(datasette: Datasette, post: dict, expected: list):
    from datasette_enrichments import wait_for_job

    cookies = await _cookies(datasette)
    post["csrftoken"] = cookies["ds_csrftoken"]
    response = await datasette.client.post(
        "/-/enrich/demo/news/re2",
        data=post,
        cookies=cookies,
    )
    assert response.status_code == 302
    db = datasette.get_database()
    job_id = (
        await db.execute("select id from _enrichment_jobs order by id desc limit 1")
    ).first()[0]
    await wait_for_job(datasette, job_id, database="demo", timeout=1)
    db = datasette.get_database("demo")
    jobs = await db.execute("select * from _enrichment_jobs")
    job = dict(jobs.first())
    assert job["status"] == "finished"
    assert job["enrichment"] == "re2"
    assert job["done_count"] == 3
    results = await db.execute("select * from news order by body")
    rows = [dict(r) for r in results.rows]
    assert rows == expected

In this test we don't know the job_id because it hasn't been assigned yet - in fact the _enrichment_jobs table is not yet created.

simonw commented 10 months ago

I think maybe that redirect after the POST should send you to the table page with ?_enrichment_job=5 in the URL.

simonw commented 10 months ago

Documentation: https://enrichments.datasette.io/en/latest/developing.html#writing-tests-for-enrichments