Closed markusdumke closed 7 years ago
test_that("SARSA(0) converges to correct policy for gridworld", { Q = sarsa(grid, n.episodes = 1000) policy = make_greedy_policy(Q) expect_equal(policy[test_states, ], optimal.policy[test_states, ]) })