ApolloResearch / rib

Library for methods related to the Local Interaction Basis (LIB)
MIT License
2 stars 0 forks source link

Pythia attention_scores mismatch (1e-5) when folding biases #245

Open stefan-apollo opened 10 months ago

stefan-apollo commented 10 months ago

Pythia attention scores match much less well (<1e-5) than the rest of the activations (<1e-11).

Implementing feature/module_for_attention_scores made this test fail for atol < 1e-5. I "fixed" it my overwriting the atol for attention_scores. We should debug this some day.

@pytest.mark.slow()
def test_pythia_folded_bias() -> None:
    """Test that the folded bias trick works for Pythia."""
    set_seed(42)
    dtype = torch.float64
    # float64 can do atol=1e-11, float32 can do atol=1e2.
    atol = 1e-11
    atol_attn_scores = 1e-5
    node_layers = ["mlp_in.1", "add_resid2.3"]
    pretrained_lm_folded_bias_comparison(
        hf_model_str="pythia-14m",
        node_layers=node_layers,
        positional_embedding_type="rotary",
        atol=atol,
        atol_attn_scores=atol_attn_scores,
        dtype=dtype,
    )

Added this xfail test to track the issue.

@pytest.mark.xfail(reason="Pythia attention scores affected more by folded biases, issue #245")
@pytest.mark.slow()
def test_pythia_folded_bias_strict_incl_attn_scores() -> None:
    """Test that the folded bias trick works for Pythia."""
    set_seed(42)
    dtype = torch.float64
    # float64 can do atol=1e-11, float32 can do atol=1e2.
    atol = 1e-11
    node_layers = ["mlp_in.1", "add_resid2.3"]
    pretrained_lm_folded_bias_comparison(
        hf_model_str="pythia-14m",
        node_layers=node_layers,
        positional_embedding_type="rotary",
        atol=atol,
        atol_attn_scores=None,
        dtype=dtype,
    )