hackworthltd / primer

A pedagogical functional programming language.
GNU Affero General Public License v3.0
13 stars 1 forks source link

`tasty_two_interp_agree` property test failure #1240

Open dhess opened 3 months ago

dhess commented 3 months ago
two interp agree:                               FAIL (4.73s)
--
  | ✗ two interp agree failed at test/Tests/EvalFullInterp.hs:431:60
  | after 212 tests, 24 shrinks and 187 discards.
  | shrink path: 212/187:dCaDeA3fCbA2dGaEaBa2Ba2
  | both terminated         99% ███████████████████▊
  | one failed to terminate  0% ····················
  |  
  | ┏━━ test/Tests/Eval/Utils.hs ━━━
  | 51 ┃ genDirTm :: PropertyT WT (Dir, Expr, Type' () ())
  | 52 ┃ genDirTm = do
  | 53 ┃   dir <- forAllT $ Gen.element @[] [Chk, Syn]
  | ┃   │ Syn
  | 54 ┃   (t', ty) <- case dir of
  | 55 ┃     Chk -> do
  | 56 ┃       ty' <- forAllT $ genWTType $ KType ()
  | 57 ┃       t' <- forAllT $ genChk ty'
  | 58 ┃       pure (t', ty')
  | 59 ┃     Syn -> forAllT genSyn
  | ┃     │ ( App
  | ┃     │     ()
  | ┃     │     (App
  | ┃     │        ()
  | ┃     │        (EmptyHole ())
  | ┃     │        (Case
  | ┃     │           ()
  | ┃     │           (Letrec
  | ┃     │              ()
  | ┃     │              LocalName { unLocalName = "x" }
  | ┃     │              (EmptyHole ())
  | ┃     │              (TEmptyHole ())
  | ┃     │              (Hole
  | ┃     │                 ()
  | ┃     │                 (Case
  | ┃     │                    ()
  | ┃     │                    (Ann
  | ┃     │                       ()
  | ┃     │                       (Letrec
  | ┃     │                          ()
  | ┃     │                          LocalName { unLocalName = "x" }
  | ┃     │                          (EmptyHole ())
  | ┃     │                          (TEmptyHole ())
  | ┃     │                          (Con
  | ┃     │                             ()
  | ┃     │                             GlobalName
  | ┃     │                               { qualifiedModule = ModuleName { unModuleName = "Builtins" :\| [] }
  | ┃     │                               , baseName = "False"
  | ┃     │                               }
  | ┃     │                             []))
  | ┃     │                       (TCon
  | ┃     │                          ()
  | ┃     │                          GlobalName
  | ┃     │                            { qualifiedModule = ModuleName { unModuleName = "Builtins" :\| [] }
  | ┃     │                            , baseName = "Bool"
  | ┃     │                            }))
  | ┃     │                    [ CaseBranch
  | ┃     │                        (PatCon
  | ┃     │                           GlobalName
  | ┃     │                             { qualifiedModule = ModuleName { unModuleName = "Builtins" :\| [] }
  | ┃     │                             , baseName = "True"
  | ┃     │                             })
  | ┃     │                        []
  | ┃     │                        (EmptyHole ())
  | ┃     │                    , CaseBranch
  | ┃     │                        (PatCon
  | ┃     │                           GlobalName
  | ┃     │                             { qualifiedModule = ModuleName { unModuleName = "Builtins" :\| [] }
  | ┃     │                             , baseName = "False"
  | ┃     │                             })
  | ┃     │                        []
  | ┃     │                        (Letrec
  | ┃     │                           ()
  | ┃     │                           LocalName { unLocalName = "x" }
  | ┃     │                           (EmptyHole ())
  | ┃     │                           (TForall
  | ┃     │                              ()
  | ┃     │                              LocalName { unLocalName = "x" }
  | ┃     │                              (KType ())
  | ┃     │                              (TVar () LocalName { unLocalName = "x" }))
  | ┃     │                           (Var () (LocalVarRef LocalName { unLocalName = "x" })))
  | ┃     │                    ]
  | ┃     │                    CaseExhaustive)))
  | ┃     │           []
  | ┃     │           CaseExhaustive))
  | ┃     │     (Let
  | ┃     │        ()
  | ┃     │        LocalName { unLocalName = "x" }
  | ┃     │        (EmptyHole ())
  | ┃     │        (Let
  | ┃     │           () LocalName { unLocalName = "y" } (EmptyHole ()) (EmptyHole ())))
  | ┃     │ , TEmptyHole ()
  | ┃     │ )
  | 60 ┃   t <- generateIDs t'
  | 61 ┃   pure (dir, t, ty)
  |  
  | ┏━━ test/Tests/EvalFullInterp.hs ━━━
  | 418 ┃ tasty_two_interp_agree :: Property
  | 419 ┃ tasty_two_interp_agree = withTests 1000
  | 420 ┃   $ withDiscards 2000
  | 421 ┃   $ propertyWT testModules
  | 422 ┃   $ do
  | 423 ┃     let globs = foldMap' moduleDefsQualified $ create' $ sequence testModules
  | 424 ┃     tds <- asks typeDefs
  | 425 ┃     (dir, t, _ty) <- genDirTm
  | 426 ┃     let optsV = ViewRedexOptions{groupedLets = True, aggressiveElision = True, avoidShadowing = False}
  | 427 ┃     let optsR = RunRedexOptions{pushAndElide = True}
  | 428 ┃     (_, ss) <- failWhenSevereLogs $ evalFullStepCount @EvalLog UnderBinders optsV optsR tds globs 100 dir t
  | 429 ┃     si <- liftIO (evalFullTest' (MicroSec 10_000) tds globs dir $ forgetMetadata t)
  | 430 ┃     case (ss, si) of
  | 431 ┃       (Right ss', Right si') -> label "both terminated" >> Hedgehog.diff (forgetMetadata ss') alphaEq si'
  | ┃       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  | ┃       │ ━━━ Failed (- lhs) (+ rhs) ━━━
  | ┃       │   App
  | ┃       │     ()
  | ┃       │     App
  | ┃       │       ()
  | ┃       │       EmptyHole ()
  | ┃       │       Case
  | ┃       │         ()
  | ┃       │         Hole
  | ┃       │           ()
  | ┃       │ -         Ann
  | ┃       │ -           ()
  | ┃       │ -           (EmptyHole ())
  | ┃       │ -           (TForall
  | ┃       │ -              ()
  | ┃       │ -              LocalName { unLocalName = "x" }
  | ┃       │ -              (KType ())
  | ┃       │ -              (TVar () LocalName { unLocalName = "x" }))
  | ┃       │ +         EmptyHole ()
  | ┃       │         []
  | ┃       │         CaseExhaustive
  | ┃       │     EmptyHole ()
  | 432 ┃       _ -> label "one failed to terminate"
  |  
  | This failure can be reproduced by running:
  | > recheckAt (Seed 11918244757349521713 2146024744268324243) "212/187:dCaDeA3fCbA2dGaEaBa2Ba2" two interp agree
  |  
  | Use "--pattern '$NF ~ /two interp agree/' --hedgehog-replay '212/187:dCaDeA3fCbA2dGaEaBa2Ba2 Seed 11918244757349521713 2146024744268324243'" to reproduce from the command-line.
  |  
  | Use -p '/two interp agree/' to rerun this test only.
  |  
  | 1 out of 852 tests failed (75.76s)

Ref:

Raw logs:

dhess commented 3 months ago

Another one:

two interp agree:                               FAIL (5.57s)
--
  | ✗ two interp agree failed at test/Tests/EvalFullInterp.hs:458:60
  | after 752 tests, 3 shrinks and 580 discards.
  | shrink path: 752/580:qA2
  | both terminated         100% ███████████████████▉
  | one failed to terminate   0% ····················
  |  
  | ┏━━ test/Tests/Eval/Utils.hs ━━━
  | 51 ┃ genDirTm :: PropertyT WT (Dir, Expr, Type' () ())
  | 52 ┃ genDirTm = do
  | 53 ┃   dir <- forAllT $ Gen.element @[] [Chk, Syn]
  | ┃   │ Syn
  | 54 ┃   (t', ty) <- case dir of
  | 55 ┃     Chk -> do
  | 56 ┃       ty' <- forAllT $ genWTType $ KType ()
  | 57 ┃       t' <- forAllT $ genChk ty'
  | 58 ┃       pure (t', ty')
  | 59 ┃     Syn -> forAllT genSyn
  | ┃     │ ( App
  | ┃     │     ()
  | ┃     │     (Let
  | ┃     │        ()
  | ┃     │        LocalName { unLocalName = "x" }
  | ┃     │        (EmptyHole ())
  | ┃     │        (App
  | ┃     │           ()
  | ┃     │           (Var
  | ┃     │              ()
  | ┃     │              (GlobalVarRef
  | ┃     │                 GlobalName
  | ┃     │                   { qualifiedModule =
  | ┃     │                       ModuleName { unModuleName = "Primitives" :\| [] }
  | ┃     │                   , baseName = "const"
  | ┃     │                   }))
  | ┃     │           (EmptyHole ())))
  | ┃     │     (EmptyHole ())
  | ┃     │ , TCon
  | ┃     │     ()
  | ┃     │     GlobalName
  | ┃     │       { qualifiedModule = ModuleName { unModuleName = "Builtins" :\| [] }
  | ┃     │       , baseName = "Bool"
  | ┃     │       }
  | ┃     │ )
  | 60 ┃   t <- generateIDs t'
  | 61 ┃   pure (dir, t, ty)
  |  
  | ┏━━ test/Tests/EvalFullInterp.hs ━━━
  | 445 ┃ tasty_two_interp_agree :: Property
  | 446 ┃ tasty_two_interp_agree = withTests 1000
  | 447 ┃   $ withDiscards 2000
  | 448 ┃   $ propertyWT testModules
  | 449 ┃   $ do
  | 450 ┃     let globs = foldMap' moduleDefsQualified $ create' $ sequence testModules
  | 451 ┃     tds <- asks typeDefs
  | 452 ┃     (dir, t, _ty) <- genDirTm
  | 453 ┃     let optsV = ViewRedexOptions{groupedLets = True, aggressiveElision = True, avoidShadowing = False}
  | 454 ┃     let optsR = RunRedexOptions{pushAndElide = True}
  | 455 ┃     (_, ss) <- failWhenSevereLogs $ evalFullStepCount @EvalLog UnderBinders optsV optsR tds globs 100 dir t
  | 456 ┃     si <- liftIO (evalFullTest' (MicroSec 10_000) tds globs dir $ forgetMetadata t)
  | 457 ┃     case (ss, si) of
  | 458 ┃       (Right ss', Right si') -> label "both terminated" >> Hedgehog.diff (forgetMetadata ss') alphaEq si'
  | ┃       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  | ┃       │ ━━━ Failed (- lhs) (+ rhs) ━━━
  | ┃       │ - Ann
  | ┃       │ -   ()
  | ┃       │ -   (EmptyHole ())
  | ┃       │ -   (TCon
  | ┃       │ -      ()
  | ┃       │ -      GlobalName
  | ┃       │ -        { qualifiedModule = ModuleName { unModuleName = "Builtins" :\| [] }
  | ┃       │ -        , baseName = "Bool"
  | ┃       │ -        })
  | ┃       │ + App
  | ┃       │ +   ()
  | ┃       │ +   (App
  | ┃       │ +      ()
  | ┃       │ +      (Var
  | ┃       │ +         ()
  | ┃       │ +         (GlobalVarRef
  | ┃       │ +            GlobalName
  | ┃       │ +              { qualifiedModule =
  | ┃       │ +                  ModuleName { unModuleName = "Primitives" :\| [] }
  | ┃       │ +              , baseName = "const"
  | ┃       │ +              }))
  | ┃       │ +      (EmptyHole ()))
  | ┃       │ +   (EmptyHole ())
  | 459 ┃       _ -> label "one failed to terminate"
  |  
  | This failure can be reproduced by running:
  | > recheckAt (Seed 8128149785453899409 10689373616275667251) "752/580:qA2" two interp agree
  |  
  | Use "--pattern '$NF ~ /two interp agree/' --hedgehog-replay '752/580:qA2 Seed 8128149785453899409 10689373616275667251'" to reproduce from the command-line.
  |  
  | Use -p '/two interp agree/' to rerun this test only.
  |  
  | 1 out of 854 tests failed (88.19s)

Ref:

Raw logs:

dhess commented 2 months ago

Tests
--
  | EvalFullInterp
  | two interp agree:                                                FAIL (1.45s)
  | ✗ two interp agree failed at test/Tests/EvalFullInterp.hs:467:60
  | after 136 tests, 9 shrinks and 105 discards.
  | shrink path: 136/105:f2DgCbA3
  | both terminated 99% ███████████████████▊
  |  
  | ┏━━ test/Tests/Eval/Utils.hs ━━━
  | 51 ┃ genDirTm :: PropertyT WT (Dir, Expr, Type' () ())
  | 52 ┃ genDirTm = do
  | 53 ┃   dir <- forAllT $ Gen.element @[] [Chk, Syn]
  | ┃   │ Syn
  | 54 ┃   (t', ty) <- case dir of
  | 55 ┃     Chk -> do
  | 56 ┃       ty' <- forAllT $ genWTType $ KType ()
  | 57 ┃       t' <- forAllT $ genChk ty'
  | 58 ┃       pure (t', ty')
  | 59 ┃     Syn -> forAllT genSyn
  | ┃     │ ( Ann
  | ┃     │     ()
  | ┃     │     (LAM
  | ┃     │        ()
  | ┃     │        LocalName { unLocalName = "x" }
  | ┃     │        (Case
  | ┃     │           ()
  | ┃     │           (Ann
  | ┃     │              ()
  | ┃     │              (EmptyHole ())
  | ┃     │              (TCon
  | ┃     │                 ()
  | ┃     │                 GlobalName
  | ┃     │                   { qualifiedModule =
  | ┃     │                       ModuleName { unModuleName = "Primitives" :\| [] }
  | ┃     │                   , baseName = "Int"
  | ┃     │                   }))
  | ┃     │           []
  | ┃     │           (CaseFallback
  | ┃     │              (Let
  | ┃     │                 ()
  | ┃     │                 LocalName { unLocalName = "x" }
  | ┃     │                 (EmptyHole ())
  | ┃     │                 (Hole
  | ┃     │                    ()
  | ┃     │                    (Case
  | ┃     │                       ()
  | ┃     │                       (Ann
  | ┃     │                          ()
  | ┃     │                          (EmptyHole ())
  | ┃     │                          (TCon
  | ┃     │                             ()
  | ┃     │                             GlobalName
  | ┃     │                               { qualifiedModule =
  | ┃     │                                   ModuleName { unModuleName = "Primitives" :\| [] }
  | ┃     │                               , baseName = "Char"
  | ┃     │                               }))
  | ┃     │                       []
  | ┃     │                       (CaseFallback
  | ┃     │                          (Letrec
  | ┃     │                             ()
  | ┃     │                             LocalName { unLocalName = "y" }
  | ┃     │                             (EmptyHole ())
  | ┃     │                             (TEmptyHole ())
  | ┃     │                             (Var
  | ┃     │                                ()
  | ┃     │                                (GlobalVarRef
  | ┃     │                                   GlobalName
  | ┃     │                                     { qualifiedModule = ModuleName { unModuleName = "M" :\| [] }
  | ┃     │                                     , baseName = "idChar"
  | ┃     │                                     }))))))))))
  | ┃     │     (TEmptyHole ())
  | ┃     │ , TEmptyHole ()
  | ┃     │ )
  | 60 ┃   t <- generateIDs t'
  | 61 ┃   pure (dir, t, ty)
  |  
  | ┏━━ test/Tests/EvalFullInterp.hs ━━━
  | 454 ┃ tasty_two_interp_agree :: Property
  | 455 ┃ tasty_two_interp_agree = withTests 1000
  | 456 ┃   $ withDiscards 2000
  | 457 ┃   $ propertyWT testModules
  | 458 ┃   $ do
  | 459 ┃     let globs = foldMap' moduleDefsQualified $ create' $ sequence testModules
  | 460 ┃     tds <- asks typeDefs
  | 461 ┃     (dir, t, _ty) <- genDirTm
  | 462 ┃     let optsV = ViewRedexOptions{groupedLets = True, aggressiveElision = True, avoidShadowing = False}
  | 463 ┃     let optsR = RunRedexOptions{pushAndElide = True}
  | 464 ┃     (_, ss) <- failWhenSevereLogs $ evalFullStepCount @EvalLog UnderBinders optsV optsR tds globs 100 dir t
  | 465 ┃     si <- liftIO (evalFullTest' (MicroSec 10_000) tds globs dir $ forgetMetadata t)
  | 466 ┃     case (ss, si) of
  | 467 ┃       (Right ss', Right si') -> label "both terminated" >> Hedgehog.diff (forgetMetadata ss') alphaEq si'
  | ┃       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  | ┃       │ ━━━ Failed (- lhs) (+ rhs) ━━━
  | ┃       │   Ann
  | ┃       │     ()
  | ┃       │     LAM
  | ┃       │       ()
  | ┃       │       LocalName { unLocalName = "x" }
  | ┃       │       Hole
  | ┃       │         ()
  | ┃       │ -       Ann
  | ┃       │ -         ()
  | ┃       │ -         (Lam
  | ┃       │ -            ()
  | ┃       │ -            LocalName { unLocalName = "x" }
  | ┃       │ -            (Var () (LocalVarRef LocalName { unLocalName = "x" })))
  | ┃       │ -         (TFun
  | ┃       │ -            ()
  | ┃       │ -            (TCon
  | ┃       │ -               ()
  | ┃       │ -               GlobalName
  | ┃       │ -                 { qualifiedModule =
  | ┃       │ -                     ModuleName { unModuleName = "Primitives" :\| [] }
  | ┃       │ -                 , baseName = "Char"
  | ┃       │ -                 })
  | ┃       │ -            (TCon
  | ┃       │ -               ()
  | ┃       │ -               GlobalName
  | ┃       │ -                 { qualifiedModule =
  | ┃       │ -                     ModuleName { unModuleName = "Primitives" :\| [] }
  | ┃       │ -                 , baseName = "Char"
  | ┃       │ -                 }))
  | ┃       │ +       Lam
  | ┃       │ +         ()
  | ┃       │ +         LocalName { unLocalName = "x" }
  | ┃       │ +         (Var () (LocalVarRef LocalName { unLocalName = "x" }))
  | ┃       │     TEmptyHole ()
  | 468 ┃       _ -> label "one failed to terminate"
  |  
  | This failure can be reproduced by running:
  | > recheckAt (Seed 18338648843675604054 720643755453659811) "136/105:f2DgCbA3" two interp agree
  |  
  | Use "--pattern '$NF ~ /two interp agree/' --hedgehog-replay '136/105:f2DgCbA3 Seed 18338648843675604054 720643755453659811'" to reproduce from the command-line.
  |  
  | Use -p '/two interp agree/' to rerun this test only.
  |  
  | 1 out of 870 tests failed (106.09s)

Ref:

Raw logs:

dhess commented 2 months ago

It seems plausible that at least one of these is related to #1247.

dhess commented 1 month ago

Yet another:

https://buildkite.com/hackworthltd/primer-ci/builds/113#018fdac8-a37c-4d44-8b25-5fec6014c7e3