Open psychemedia opened 1 week ago
I prompted Clause:
pytest /nbval lets me run tests against all notebooks on a path as well as specifying a single named notebook. Could the above be modified to allow me to compare notebooks on one path with notebooks on another (the notebooks would have the same child paths and filenames)
import pytest
import nbformat
from pathlib import Path
from nbval.plugin import IPyNbCell, coalesce_streams, transform_streams_for_comparison
class DualNotebookFile(pytest.File):
def __init__(self, fspath, parent, reference_path):
super().__init__(fspath, parent)
self.reference_path = reference_path
def collect(self):
ref_nb = nbformat.read(self.reference_path, as_version=4)
test_nb = nbformat.read(str(self.fspath), as_version=4)
for cell_num, (ref_cell, test_cell) in enumerate(zip(ref_nb.cells, test_nb.cells)):
if ref_cell.cell_type == 'code' and test_cell.cell_type == 'code':
yield DualNotebookCell.from_parent(
self,
name=f"Cell {cell_num}",
cell_num=cell_num,
ref_cell=ref_cell,
test_cell=test_cell
)
class DualNotebookCell(IPyNbCell):
def __init__(self, name, parent, cell_num, ref_cell, test_cell):
super().__init__(name, parent, cell_num, test_cell, {})
self.ref_cell = ref_cell
def runtest(self):
ref_outputs = coalesce_streams(self.ref_cell.outputs)
test_outputs = coalesce_streams(self.test_cell.outputs)
if not self.compare_outputs(test_outputs, ref_outputs):
self.raise_cell_error(
"Cell outputs differ",
'\n'.join(self.comparison_traceback),
)
def pytest_collect_file(file_path, parent):
if file_path.suffix == ".ipynb" and parent.config.getoption("--compare-notebook-dirs"):
reference_dir = Path(parent.config.getoption("--reference-dir"))
test_dir = Path(parent.config.getoption("--test-dir"))
# Calculate the relative path of the current notebook
relative_path = file_path.relative_to(test_dir)
# Construct the path to the corresponding reference notebook
reference_path = reference_dir / relative_path
if reference_path.exists():
return DualNotebookFile.from_parent(parent, fspath=file_path, reference_path=reference_path)
else:
print(f"Warning: No matching reference notebook found for {file_path}")
return None
def pytest_addoption(parser):
group = parser.getgroup("notebook_comparison")
group.addoption(
"--compare-notebook-dirs",
action="store_true",
help="Compare notebooks in two directories"
)
group.addoption(
"--reference-dir",
help="Path to the directory containing reference notebooks"
)
group.addoption(
"--test-dir",
help="Path to the directory containing test notebooks"
)
def pytest_configure(config):
if config.getoption("--compare-notebook-dirs"):
# Ensure both directories are provided
if not config.getoption("--reference-dir") or not config.getoption("--test-dir"):
raise pytest.UsageError("Both --reference-dir and --test-dir must be specified when using --compare-notebook-dirs")
This modified version:
Introduces new options:
--compare-notebook-dirs
: Enables the notebook directory comparison mode--reference-dir
: Specifies the directory containing reference notebooks--test-dir
: Specifies the directory containing test notebooksIn pytest_collect_file, we now: Calculate the relative path of each test notebook Construct the corresponding path in the reference directory Only create a DualNotebookFile if a matching reference notebook exists
We've added a pytest_configure hook to ensure both directory paths are provided when using this mode.
To use this, you would run pytest like:
pytest --compare-notebook-dirs --reference-dir=path/to/reference/notebooks --test-dir=path/to/test/notebooks
This command will:
- Recursively find all notebooks in the test directory For each test notebook, look for a corresponding notebook in the reference directory If a matching reference notebook is found, compare their outputs
This approach allows you to compare entire directory structures of notebooks, matching them by their relative paths. It maintains the ability to compare individual notebooks while adding the functionality to compare entire directories.
Prompting Claude for a way of allowing me to call nbval as in the original case, or to compare two files, or similalry named files on two paths, it suggests:
import pytest
import nbformat
from pathlib import Path
from nbval.plugin import IPyNbFile, IPyNbCell, coalesce_streams
class DualNotebookFile(pytest.File):
def __init__(self, fspath, parent, reference_path):
super().__init__(fspath, parent)
self.reference_path = reference_path
def collect(self):
ref_nb = nbformat.read(self.reference_path, as_version=4)
test_nb = nbformat.read(str(self.fspath), as_version=4)
for cell_num, (ref_cell, test_cell) in enumerate(zip(ref_nb.cells, test_nb.cells)):
if ref_cell.cell_type == 'code' and test_cell.cell_type == 'code':
yield DualNotebookCell.from_parent(
self,
name=f"Cell {cell_num}",
cell_num=cell_num,
ref_cell=ref_cell,
test_cell=test_cell
)
class DualNotebookCell(IPyNbCell):
def __init__(self, name, parent, cell_num, ref_cell, test_cell):
super().__init__(name, parent, cell_num, test_cell, {})
self.ref_cell = ref_cell
def runtest(self):
ref_outputs = coalesce_streams(self.ref_cell.outputs)
test_outputs = coalesce_streams(self.test_cell.outputs)
if not self.compare_outputs(test_outputs, ref_outputs):
self.raise_cell_error(
"Cell outputs differ",
'\n'.join(self.comparison_traceback),
)
def pytest_collect_file(file_path, parent):
if file_path.suffix == ".ipynb":
config = parent.config
if config.getoption("--nbval"):
# Original nbval functionality
return IPyNbFile.from_parent(parent, fspath=file_path)
elif config.getoption("--compare-notebooks"):
# Compare two specific notebooks
reference_path = config.getoption("--reference-notebook")
return DualNotebookFile.from_parent(parent, fspath=file_path, reference_path=reference_path)
elif config.getoption("--compare-notebook-dirs"):
# Compare similarly named files on two paths
reference_dir = Path(config.getoption("--reference-dir"))
test_dir = Path(config.getoption("--test-dir"))
relative_path = file_path.relative_to(test_dir)
reference_path = reference_dir / relative_path
if reference_path.exists():
return DualNotebookFile.from_parent(parent, fspath=file_path, reference_path=reference_path)
else:
print(f"Warning: No matching reference notebook found for {file_path}")
return None
def pytest_addoption(parser):
group = parser.getgroup("notebook_comparison")
# Original nbval option
group.addoption("--nbval", action="store_true", help="Validate jupyter notebooks")
# Options for comparing two specific notebooks
group.addoption("--compare-notebooks", action="store_true", help="Compare two specific notebooks")
group.addoption("--reference-notebook", help="Path to the reference notebook")
# Options for comparing notebooks in two directories
group.addoption("--compare-notebook-dirs", action="store_true", help="Compare notebooks in two directories")
group.addoption("--reference-dir", help="Path to the directory containing reference notebooks")
group.addoption("--test-dir", help="Path to the directory containing test notebooks")
def pytest_configure(config):
if config.getoption("--compare-notebooks") and not config.getoption("--reference-notebook"):
raise pytest.UsageError("--reference-notebook must be specified when using --compare-notebooks")
if config.getoption("--compare-notebook-dirs"):
if not config.getoption("--reference-dir") or not config.getoption("--test-dir"):
raise pytest.UsageError("Both --reference-dir and --test-dir must be specified when using --compare-notebook-dirs")
pytest --nbval
pytest --compare-notebooks --reference-notebook=path/to/reference.ipynb path/to/test.ipynb
pytest --compare-notebook-dirs --reference-dir=path/to/reference/notebooks --test-dir=path/to/test/notebooks
If we have run the same notebook in two environments, it would be useful to be able to compare them.
This doesn't require
nbval
to execute notebooks, just take two pre-run notebooks with the same input cells and then compare their outputs.Claude.ai suggests:
with commentary: