rstudio / pins-python

https://rstudio.github.io/pins-python/
MIT License
50 stars 12 forks source link

Organize test suite into classes #257

Open nathanjmcdougall opened 2 months ago

nathanjmcdougall commented 2 months ago

Currently tests are organized into modules and then separated into sections using comment rules e.g.

# pin_download ===================================================================

This can make it hard to navigate the test suite and also results in longer test names: the test names include both the category information and the specifics of the test.

Pytest allows you to organize tests into classes. Besides adding some clarity to the test organization, this also allows the user to have more fine-grained control about which tests run.

So I would propose changing the following:

# pin_write ===================================================================

def test_board_pin_write_default_title(board):

    df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
    meta = board.pin_write(df, "df_csv", title=None, type="csv")
    assert meta.title == "df_csv: a pinned 3 x 2 DataFrame"

def test_board_pin_write_prepare_pin(board, tmp_dir2):

    df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

    meta = board.prepare_pin_version(
        str(tmp_dir2), df, "df_csv", title=None, type="csv"
    )
    assert meta.file == "df_csv.csv"
    assert (tmp_dir2 / "data.txt").exists()
    assert (tmp_dir2 / "df_csv.csv").exists()
    assert not (tmp_dir2 / "df_csv.csv").is_dir()

def test_board_pin_write_roundtrip(board):

    df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

    assert not board.pin_exists("df_csv")

    board.pin_write(df, "df_csv", type="csv")

    assert board.pin_exists("df_csv")

    loaded_df = board.pin_read("df_csv")
    assert loaded_df.equals(df)

def test_board_pin_write_type_not_specified_error(board):
    class C:
        pass

    with pytest.raises(NotImplementedError):
        board.pin_write(C(), "cool_pin")

To this:

class TestBoard:
    ... # All the other sets of tests on pins.board.BaseBoard methods...
    class TestPinWrite:
        def test_default_title(self, board):

            df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
            meta = board.pin_write(df, "df_csv", title=None, type="csv")
            assert meta.title == "df_csv: a pinned 3 x 2 DataFrame"

        def test_prepare_pin(self, board, tmp_dir2):

            df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

            meta = board.prepare_pin_version(
                str(tmp_dir2), df, "df_csv", title=None, type="csv"
            )
            assert meta.file == "df_csv.csv"
            assert (tmp_dir2 / "data.txt").exists()
            assert (tmp_dir2 / "df_csv.csv").exists()
            assert not (tmp_dir2 / "df_csv.csv").is_dir()

        def test_roundtrip(self, board):

            df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

            assert not board.pin_exists("df_csv")

            board.pin_write(df, "df_csv", type="csv")

            assert board.pin_exists("df_csv")

            loaded_df = board.pin_read("df_csv")
            assert loaded_df.equals(df)

        def test_type_not_specified_error(self, board):
            class C:
                pass

            with pytest.raises(NotImplementedError):
                board.pin_write(C(), "cool_pin")

        ... # etc.

My plan for making this change would be one test file at a time.

isabelizimm commented 2 months ago

The # pin_write ========== is probably a good signal that some of the tests would be better organized into classes. I don't anticipate all the tests make sense as classes, but boards/caches seem like good places to start.