Create a game tree with Pokerkit

LucasColas commented 9 months ago

Hello, I'm working to create a poker solver. I'm using CFR. And I need to create a game tree. I've been trying to use your library to create a Pokerkit. However I'm still having some issues. Particularly when a node reaches the river. For instance a player bets and the other player calls, sometimes I don't have state.street is None equals to True. I also have another problem. I didn't find a method to know when I have to call burn_card() and deal_board, so I did a Try Except. Could you give a code to do a random poker tree ?

Here's the code I did that implements DeepCFR and creates a game tree :


class DeepCFR:
    def __init__(
        self,
        starting_stacks: tuple[int],
        blinds: tuple[int, int],
        ranges: tuple[tuple[tuple[str, str]], tuple[tuple[str, str]]],
        pot: int,
        board: tuple[str],
        bets: tuple[tuple[int], tuple[int]] = [(0.3, 0.7), (0.3, 0.7)],
        iterations: int = 1,
        K: int = 1,
        n_players: int = 2,
        mv_max: int = 100000,
        mpi_max: int = 200000,
    ):
        """

        starting_stacks: tuple of int
            The starting stacks of the players
        blinds: tuple of int
            The small and big blinds
        ranges: tuple of tuples of str
            The ranges of the players. The first tuple is the range of the player ip and the second tuple is the range of the second player oop.
        pot: int
            The chips in the pot
        board: tuple of str
            The cards on the board. Each card is a string of the form "Xy" where X is the value and y is the suit
        bets: tuple of tuples of int
            The bets of the players. The first tuple is the bets of the player ip and the second tuple is the bets of the second player oop.
        iterations: int
            Number of cfr iterations to run
        K: int
            Number of traversals to run
        n_players: int
            Number of players
        mv_max: int
            Maximum size of the advantage memory
        mpi_max: int
            Maximum size of the strategy memory

        """
        self.iterations = iterations
        self.n_players = n_players
        self.K = K

        self.starting_stacks = starting_stacks
        self.blinds = blinds
        self.ranges = ranges
        self.pot = pot
        self.board = board
        self.bets = bets
        self.m_v = [[], []]
        self.m_pi = []
        self.mv_max = mv_max
        self.mpi_max = mpi_max
        self.val_net = []
        self.n_game = 0

        self.ip = 1

    def get_opponent(self, player: int):
        return 1 if player == 0 else 0

    def run(self):

        start = time.time()
        print("Running DeepCFR...")

        for t in range(self.iterations):
            for player in range(self.n_players):
                # Too many iterations. There needs to do a random picking of the cards
                for k in range(self.K):
                    if k % 1000 == 0:
                        print(
                            f"Iteration {t} of {self.iterations} and k {k} of {self.K}"
                        )
                    card = random.choice(self.ranges[player])
                    opponent_card = random.choice(self.ranges[self.get_opponent(player)])
                    game = self.create_game(player, card, opponent_card)
                    print(card, opponent_card, self.board)
                    available_cards = [c for c in CARDS if c not in card + opponent_card + self.board]
                    turn_card = random.choice(available_cards)
                    river_card = random.choice([c for c in available_cards if c != turn_card])
                    turn_river_cards = [turn_card, river_card]
                    print("turn_river_cards : ", turn_river_cards)
                    print("card : ", card)
                    print("opponent_card : ", opponent_card)
                    self.Traverse(game, player, t, card, opponent_card, turn_river_cards, [])

                # Update the advantage networks V using the advantage memories MV,1, MV,2
        # Update the strategy network Π using the strategy memory MΠ
        print("m_v: ", self.m_v)
        print("m_pi: ", self.m_pi)
        print("n_game: ", self.n_game)

        print(f"DeepCFR finished in {time.time() - start:.2f} seconds")

    def create_game(self, player: int, card: tuple[str, str], opponent_card: tuple[str, str]):
        if len(self.board) == 3:
            game = NoLimitTexasHoldem.create_state(
                # Automations
                (
                    Automation.ANTE_POSTING,
                    Automation.BET_COLLECTION,
                    Automation.BLIND_OR_STRADDLE_POSTING,
                    Automation.HOLE_CARDS_SHOWING_OR_MUCKING,
                    Automation.HAND_KILLING,
                    Automation.CHIPS_PUSHING,
                    Automation.CHIPS_PULLING,
                ),
                False,  # Uniform antes?
                0,  # Antes
                self.blinds,  # Blinds or straddles
                self.blinds[1],  # Min-bet
                (self.starting_stacks[0]+self.blinds[1], self.starting_stacks[1]+self.blinds[1]),  # Starting stacks
                2,  # Number of players
            )

        if player == self.ip:
            game.deal_hole("".join(opponent_card))
            game.deal_hole("".join(card))
        else:
            game.deal_hole("".join(card))
            game.deal_hole("".join(opponent_card))

        game.check_or_call()
        game.check_or_call()

        game.burn_card()
        game.deal_board("".join(self.board))

        return game

    def payoff(self, game, player: int):
        return game.stacks[player] - self.starting_stacks[player]

    def get_available_moves(self, game, player):
        available_moves = []
        #new_game = deepcopy(game)
        if game.can_check_or_call():
            available_moves.append(("check/call", 0))
        #print("self bets : ", self.bets[player])
        for bet in self.bets[player]:
            new_game = deepcopy(game)
            if int(bet*game.stacks[player]) <= game.stacks[player] and int(bet*game.stacks[player]) >= new_game.verify_completion_betting_or_raising_to():
                available_moves.append(("raise", bet*game.stacks[player]))

        return available_moves

    def Traverse(
        self,
        game,
        player: int,
        traversal: int,
        player_cards: tuple[str, str],
        opponent_cards: tuple[str, str],
        turn_river_cards: list[str],
        bet_history : list,
    ):

        if game.street is None:

            return self.payoff(game, player)

        try:

            if len(turn_river_cards) == 0 and game.street is None:
                return self.payoff(game, player)

            elif len(turn_river_cards) > 0:

                game.burn_card()
                game.deal_board(turn_river_cards.pop(0))
                print("card burnt and dealt")
                print(game.burn_cards)
                print(game.board_cards)
                print(game.street)

                return self.Traverse(game, player, traversal, player_cards, opponent_cards, turn_river_cards, bet_history)
            #print("didn't go inside a if")
        except ValueError:
            pass

        if game.street is None:
            print("end of the hand : ", game.stacks)
            return self.payoff(game, player)

        new_game = deepcopy(game)
        player_to_act = new_game._pop_actor_index()

        if player_to_act == player:
            #print("player to act : ", player_to_act)
            new_game = deepcopy(game)
            available_moves = self.get_available_moves(new_game, player)
            values = np.array([0.0] * len(available_moves))
            regrets = np.array([0.0] * len(available_moves))
            for index, move in enumerate(available_moves):

                new_game = deepcopy(game)
                if move[0] == "check/call":
                    #print("check/call")
                    new_game.check_or_call()

                else:

                    new_game.complete_bet_or_raise_to(move[1])
                    bet_history.append((player, 1, move[1]))

                values[index] = self.Traverse(new_game, player, traversal, player_cards, opponent_cards, turn_river_cards, bet_history)

            for index, move in enumerate(available_moves):

                regrets[index] = values[index] - np.max(values)

            cards = (player_cards, game.board_cards)
            infoset = (cards, bet_history)
            self.m_v[player].append((infoset, traversal, regrets))
        else:
            #print("opponent to act")
            available_moves = self.get_available_moves(game, self.get_opponent(player))
            pred = random.choice(available_moves)
            if pred[0] == "check/call":
                #print("check/call")
                game.check_or_call()
            else:
                #print("raise")
                game.complete_bet_or_raise_to(pred[1])
                #bet_history.append(self.get_opponent(player), 1, pred[1])
            return self.Traverse(game, player, traversal, player_cards, opponent_cards, turn_river_cards, bet_history)

# Example
DeepCFRAlgo = DeepCFR(
    [100, 100],
    (1, 2),
    (
        (("2c", "2d"), ("Kh", "Kd")),
        (("3c", "3d"), ("Ah", "As"), ("Kc", "Kd"), ("Kh", "Kd")),
    ),
    100,
    ("2h", "3h", "4h"),
)
DeepCFRAlgo.run()

The main problem I have is at the river :

return self.actor_indices.popleft()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
IndexError: pop from an empty deque

the issue is created from player_to_act = new_game._pop_actor_index().

AussieSeaweed commented 9 months ago

Hi, thanks for creating an issue.

You should not call _pop_actor_index. It is a hidden helper method that should not be called by outsiders. Just use the actor_index property. It gives you who is in turn to fold, check, call, complete, bet, or raise.

With that aside, you can make your code simpler by using the following query methods to check if an action can be performed: can_deal_board, can_deal_hole, can_burn_card, can_complete_bet_or_raise_to.

To see who is going to be dealt hole cards, you can access hole_dealee_index.

AussieSeaweed commented 9 months ago

Closed due to inactivity.

uoftcprg / pokerkit

Create a game tree with Pokerkit #7