recommenders-team / recommenders

Best Practices on Recommendation Systems
https://recommenders-team.github.io/recommenders/intro.html
MIT License
19.2k stars 3.1k forks source link

[BUG] xdeepfm error in AzureML test #1848

Closed miguelgfierro closed 1 year ago

miguelgfierro commented 1 year ago

Description

    @pytest.mark.gpu
    @pytest.mark.notebooks
    @pytest.mark.integration
    @pytest.mark.parametrize(
        "syn_epochs, criteo_epochs, expected_values, seed",
        [
            (
                15,
                10,
                ***
                    "res_syn": ***"auc": 0.9716, "logloss": 0.699***,
                    "res_real": ***"auc": 0.749, "logloss": 0.4926***,
                ***,
                42,
            )
        ],
    )
    def test_xdeepfm_integration(
        notebooks,
        output_notebook,
        kernel_name,
        syn_epochs,
        criteo_epochs,
        expected_values,
        seed,
    ):
        notebook_path = notebooks["xdeepfm_quickstart"]
        pm.execute_notebook(
            notebook_path,
            output_notebook,
            kernel_name=kernel_name,
            parameters=dict(
                EPOCHS_FOR_SYNTHETIC_RUN=syn_epochs,
                EPOCHS_FOR_CRITEO_RUN=criteo_epochs,
                BATCH_SIZE_SYNTHETIC=1024,
                BATCH_SIZE_CRITEO=1024,
                RANDOM_SEED=seed,
            ),
        )
        results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
            "data"
        ]

        for key, value in expected_values.items():
>           assert results[key]["auc"] == pytest.approx(value["auc"], rel=TOL, abs=ABS_TOL)
E           assert 0.5131 == 0.9716 ± 9.7e-02
E             comparison failed
E             Obtained: 0.5131
E             Expected: 0.9716 ± 9.7e-02

In which platform does it happen?

How do we replicate the issue?

See https://github.com/microsoft/recommenders/actions/runs/3459763061/jobs/5775521889

Expected behavior (i.e. solution)

Other Comments

miguelgfierro commented 1 year ago
 pytest tests/integration/examples/test_notebooks_gpu.py::test_xdeepfm_integration --disable-warnings --durations 0

with

@pytest.mark.gpu
@pytest.mark.notebooks
@pytest.mark.integration
@pytest.mark.parametrize(
    "syn_epochs, criteo_epochs, expected_values, seed",
    [
        (
            15,
            10,
            {
                "res_syn": {"auc": 0.9716, "logloss": 0.699},
                "res_real": {"auc": 0.749, "logloss": 0.4926},
            },
            42,
        )
    ],
)
def test_xdeepfm_integration(
    notebooks,
    output_notebook,
    kernel_name,
    syn_epochs,
    criteo_epochs,
    expected_values,
    seed,
):
    notebook_path = notebooks["xdeepfm_quickstart"]
    pm.execute_notebook(
        notebook_path,
        output_notebook,
        kernel_name=kernel_name,
        parameters=dict(
            EPOCHS_FOR_SYNTHETIC_RUN=syn_epochs,
            EPOCHS_FOR_CRITEO_RUN=criteo_epochs,
            BATCH_SIZE_SYNTHETIC=1024,
            BATCH_SIZE_CRITEO=1024,
            RANDOM_SEED=seed,
        ),
    )
    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[
        "data"
    ]

    for key, value in expected_values.items():
        assert results[key]["auc"] == pytest.approx(value["auc"], rel=TOL, abs=ABS_TOL)
        assert results[key]["logloss"] == pytest.approx(
            value["logloss"], rel=TOL, abs=ABS_TOL
        )