Better and more extensive end to end tests

It would be nice to have some regular (e.g. scheduled via cron github actions) "end to end" tests with real content.

For that, we already have a small snippet that lists urls from the database. However, the extracted URLs should not be part of this repository.

The test should then query the the metadata for these contents.

Sample code:


@pytest.fixture
def splash_samples() -> list[SplashResponse]:
    """Load splash responses from test resources."""

    def samples():
        for _, _, files in os.walk(Path(__file__).parent / "resources"):
            for file in files:
                if file.startswith("splash-sample"):
                    with open(Path(__file__).parent / "resources" / file, "r") as f:
                        yield SplashResponse.parse_obj(json.load(f))

    return list(samples())

@pytest.mark.asyncio
async def test_fetch_splash_samples():
    with open(Path(__file__).parent / "resources" / "urls.txt", "r") as f:
        urls = f.readlines()

    with mock.patch("lib.settings.SPLASH_URL", "http://localhost:8050"), mock.patch(
        "core.website_manager.SPLASH_URL", "http://localhost:8050"
    ):
        for index, url in enumerate(urls):
            logging.info(f"Fetching {url}")
            try:
                response: SplashResponse = await WebsiteData.fetch_content(url)
                with open(Path(__file__).parent.parent / f"splash-sample-{index}.json", "w") as f:
                    json.dump(response.dict(), f)
            except Exception as e:
                logging.exception(f"failed to fetch splash for {url}: {e}")

openeduhub / metalookup

Better and more extensive end to end tests #107