diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a9ee011e..aeec166f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,7 +23,7 @@ jobs: pip install pytest sudo apt-get update sudo apt-get install libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg cmake swig - pip install git+https://github.com/Farama-Foundation/PettingZoo.git + # pip install git+https://github.com/Farama-Foundation/PettingZoo.git pip install -e .[all] - name: Full Python tests run: | diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 6dc2058c..578e9483 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -1,8 +1,66 @@ -"""Multi-objective Ingenious environment for MOMAland. +"""Ingenious environment. + +|--------------------|--------------------------------------------------------------| +| Actions | Discrete | +| Parallel API | No | +| Manual Control | No | +| Agents | num_agents=2 | +| Action Shape | (1,) | +| Action Values | Discrete(size depends on board size and rack size: there | +| | is one integer encoding the placement of each rack tile | +| | on each board hex in each possible direction.) | +| Observations | Observations are dicts with three entries: | +| | "board": array with size (2*board_size-1, 2*board_size-1) | +| | containing values from 0 to num_colors; | +| | "racks": for each observable agent, an array of length | +| | rack_size containing pairs of values from 0 to num_colors; | +| | "scores": for all agents, their scores in all num_colors | +| | objectives as values from 0 to max_score. | +| Reward Shape | (num_colors=6,) | This environment is based on the Ingenious game: https://boardgamegeek.com/boardgame/9674/ingenious -Every color is a different objective. The goal in the original game is to maximize the minimum score over all colors, -however we leave the utility wrapper up to the users and only return the vectorial score on each color dimension. + +The game's original rules support multiple players collecting scores in multiple colors, which we define as the +objectives of the game: for example (red=5, green=2, blue=9). The goal in the original game is to maximize the +minimum score over all colors (2 in the example above), however we leave the utility wrapper up to the users and only +return the vectorial score on each color dimension (5,2,9). + + +### Observation Space + +The observation is a dictionary which contains an 'observation' element which is the usual RL observation, +and an 'action_mask' which holds the legal moves, described in the Legal Actions Mask section below. + +The 'observation' element itself is a dictionary with three entries: 'board' is representing the hexagonal board as +an array of size (2*board_size-1, 2*board_size-1) with integer entries from 0 (empty hex) to num_colors (tiles of +different colors). 'racks' represents for each observable agent - by default only the acting agent, if fully_obs=True +all agents - their tiles rack as an array of size rack_size containing pairs of integers (each pair is a tile) from 0 +to num_colors. 'scores' represents for all agents their current scores in all num_colors objectives, as integers from +0 to max_score. + + +#### Legal Actions Mask + +The legal moves available to the current agent are found in the 'action_mask' element of the dictionary observation. +The 'action_mask' is a binary vector where each index of the vector represents whether the represented action is legal +or not; the action encoding is described in the Action Space section below. +The 'action_mask' shows only the current agent's legal moves. + + +### Action Space + +The action space depends on board size and rack size: It contains one integer for each possible placement of any of +the player's rack tiles (rack_size parameter) on any board hex (board_size parameter) in every possible direction. + + +### Rewards + +The agents can collect a separate score in each available color. These scores are the num_colors different reward +dimensions. + + +### Version History + """ import functools @@ -21,13 +79,13 @@ def env(**kwargs): - """Autowrapper for multi-objective Ingenious game. + """Returns the wrapped Ingenious environment in `AEC` format. Args: - **kwargs: keyword args to forward to the parallel_env function + **kwargs: keyword args to forward to the raw_env function Returns: - A fully wrapped env + A fully wrapped AEC env """ env = raw_env(**kwargs) @@ -37,85 +95,71 @@ def env(**kwargs): def raw_env(**kwargs): - """Env factory function for multi-objective Ingenious game.""" - return MOIngenious(**kwargs) - - -class MOIngenious(MOAECEnv): - """Ingenious board game. - - Ingenious is a turn-based board game for multiple players. 2-4 players can play (default is 2), on a hexagonal - board with an edge length of 3-10 (default is 6). Each player has 2-6 (default is 6) tiles with colour symbols on - their rack (hand). In sequential order, players play one of their tiles onto the hexagonal board, with the goal - of establishing lines of matching symbols emerging from the placed tile. This allows the players to increase - their score in the respective colors, each color representing one of 2-6 (default is 6) objectives. New tiles are - randomly drawn, and the racks of other players with their currently available tiles are not observable (in the - default rules). When the board is filled, the original game rules define the winner as the player who has the - highest score in their lowest-scoring colour. This implementation exposes the colour scores themselves as - different objectives, allowing arbitrary utility functions to be defined over them. - - ## Observation Space - The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described - below, and an `'action_mask'` which holds the legal moves, described in the Legal Actions Mask section below. - The main observation space is a dictionary containing the `'board'`, the `'tiles'`, and the `'scores'`. TODO describe. why do we return the scores of the player? - - ## Legal Actions Mask - The legal moves available to the current agent are found in the `action_mask` element of the dictionary observation. - The `action_mask` is a binary vector where each index of the vector represents whether the represented action is legal - or not; the action encoding is described in the Action Space section below. - The `action_mask` will be all zeros for any agent except the one whose turn it is. TODO is this true? - - ## Action Space - The action space is the set of integers from 0 to TODO describe action encoding here, with reference to web resource for hex encoding - - ## Rewards - The reward dimensions correspond to the 2-6 (default is 6) different colors that the players can score points for. - - ## Starting State - The game starts with an empty board, and each player with 2-6 (default is 6) randomly drawn tiles in their hand. - - ## Arguments - - 'num_players' (int): The number of players in the environment. Default: 2 - - 'init_draw' (int): The number of tiles each player draws at the beginning of the game. Default: 6 - - 'num_colors' (int): The number of colors in the game. Default: 6 - - 'board_size' (int): The size of the board. Default: 6 - - 'limitation_score' (int): Maximum score for any color Default: 18 - - 'render_mode' (str): The rendering mode. Default: None - - ## Version History - """ + """Env factory function for the Ingenious environment.""" + return Ingenious(**kwargs) + + +class Ingenious(MOAECEnv): + """Environment for the Ingenious board game.""" metadata = {"render_modes": ["human"], "name": "moingenious_v0", "is_parallelizable": False} - def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limitation_score=18, render_mode=None): - """Initializes the multi-objective Ingenious game. + def __init__( + self, + num_agents: int = 2, + rack_size: int = 6, + num_colors: int = 6, + board_size: int = None, + reward_mode: str = "competitive", + fully_obs: bool = False, + render_mode: bool = None, + ): + """Initializes the Ingenious environment. Args: - num_players (int): The number of players in the environment. Default: 2 - init_draw (int): The number of tiles each player draws at the beginning of the game. Default: 6 - num_colors (int): The number of colors in the game. Default: 6 - board_size (int): The size of the board. Default: 6 - limitation_score (int): Maximum score for any color. Default: 18 + num_agents (int): The number of agents (between 2 and 6). Default is 2. + rack_size (int): The number of tiles each player keeps in their rack (between 2 and 6). Default is 6. + num_colors (int): The number of colors (objectives) in the game (between 2 and 6). Default is 6. + board_size (int): The size of one side of the hexagonal board (between 3 and 10). By default the size is set + to n+4 where n is the number of agents. + reward_mode (str): Can be set to "competitive" (individual rewards for all agents), "collaborative" (shared + rewards for all agents), or "two_teams" (rewards shared within two opposing teams; num_agents needs to be + even). Default is "competitive". + fully_obs (bool): Fully observable game mode, i.e. the racks of all players are visible. Default is False. render_mode (str): The rendering mode. Default: None """ - self.board_size = board_size self.num_colors = num_colors - self.init_draw = init_draw - self.num_players = num_players - self.limitation_score = limitation_score + self.init_draw = rack_size + self.max_score = 18 # max score in score board for one certain color. + assert reward_mode in { + "competitive", + "collaborative", + "two_teams", + }, "reward_mode has to be one element in {'competitive','collaborative','two_teams'}" + self.reward_mode = reward_mode + self.fully_obs = fully_obs + + if self.reward_mode == "two_teams": + assert num_agents % 2 == 0, "Number of players must be even if reward_mode is two_teams." + self.max_score = self.max_score * (num_agents / 2) + elif self.reward_mode == "collaborative": + self.max_score = self.max_score * num_agents + + if board_size is None: + self.board_size = {2: 6, 3: 7, 4: 8, 5: 9, 6: 10}.get(num_agents) + else: + self.board_size = board_size self.game = IngeniousBase( - num_players=num_players, - init_draw=init_draw, - num_colors=num_colors, - board_size=board_size, - limitation_score=limitation_score, + num_agents=num_agents, + rack_size=self.init_draw, + num_colors=self.num_colors, + board_size=self.board_size, + max_score=self.max_score, ) - self.possible_agents = ["agent_" + str(r) for r in range(num_players)] - # init list of agent + self.possible_agents = ["agent_" + str(r) for r in range(num_agents)] self.agents = self.possible_agents[:] - self.terminations = {agent: False for agent in self.agents} self.truncations = {agent: False for agent in self.agents} self.infos = {agent: {} for agent in self.agents} @@ -124,8 +168,6 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limit self.refresh_cumulative_reward = True self.render_mode = render_mode - # Observation space is a dict of 2 elements: actions mask and game state (board, agent own tile bag, - # agent score) self.observation_spaces = { i: Dict( { @@ -134,8 +176,10 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limit "board": Box( 0, len(ALL_COLORS), shape=(2 * self.board_size - 1, 2 * self.board_size - 1), dtype=np.float32 ), - "tiles": Box(0, self.num_colors, shape=(self.init_draw, 2), dtype=np.int32), - "scores": Box(0, self.game.limitation_score, shape=(self.num_colors,), dtype=np.int32), + "racks": Box(0, self.num_colors, shape=(num_agents, self.init_draw, 2), dtype=np.int32) + if self.fully_obs + else Box(0, self.num_colors, shape=(self.init_draw, 2), dtype=np.int32), + "scores": Box(0, self.game.max_score, shape=(num_agents, self.num_colors), dtype=np.int32), } ), "action_mask": Box(low=0, high=1, shape=(len(self.game.masked_action),), dtype=np.int8), @@ -144,12 +188,10 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limit for i in self.agents } - self.action_spaces = dict(zip(self.agents, [Discrete(len(self.game.masked_action))] * num_players)) + self.action_spaces = dict(zip(self.agents, [Discrete(len(self.game.masked_action))] * num_agents)) - # The reward after one move is the difference between the previous and current score. - self.reward_spaces = dict( - zip(self.agents, [Box(0, self.game.limitation_score, shape=(self.num_colors,))] * num_players) - ) + # The reward for each move is the difference between the previous and current score. + self.reward_spaces = dict(zip(self.agents, [Box(0, self.game.max_score, shape=(self.num_colors,))] * num_agents)) @functools.lru_cache(maxsize=None) @override @@ -164,7 +206,6 @@ def action_space(self, agent): @override def reward_space(self, agent): - """Returns the reward space for the given agent.""" return self.reward_spaces[agent] @override @@ -181,8 +222,7 @@ def render(self): @override def reset(self, seed=None, options=None): """Reset needs to initialize the `agents` attribute and must set up the environment so that render(), - and step() can be called without issues. - """ + and step() can be called without issues.""" if seed is not None: np.random.seed(seed) random.seed(seed) @@ -206,7 +246,6 @@ def step(self, action): Args: action: action of the active agent """ - current_agent = self.agent_selection if self.terminations[current_agent] or self.truncations[current_agent]: @@ -215,6 +254,7 @@ def step(self, action): if self.refresh_cumulative_reward: self._cumulative_rewards[current_agent] = np.zeros(self.num_colors, dtype="float64") + # update current agent if not self.game.end_flag: prev_rewards = np.array(list(self.game.score[current_agent].values())) self.game.set_action_index(action) @@ -224,7 +264,23 @@ def step(self, action): if self.game.end_flag: self.terminations = {agent: True for agent in self.agents} - # update accumulate_rewards + # update teammate score (copy current agent's score to teammates) + if self.reward_mode != "competitive": + index_current_agent = self.agents.index(current_agent) + for i in range(0, self.num_agents): + if self.reward_mode == "two_teams": + # in two_team mode, players who are teammates of the current agent get the same reward and score + if i != index_current_agent and i % 2 == index_current_agent % 2: + agent = self.agents[i] + self.game.score[agent] = self.game.score[current_agent] + self.rewards[agent] = self.rewards[current_agent] + elif self.reward_mode == "collaborative": + # in collaborative mode, every player gets the same reward and score + if i != index_current_agent: + agent = self.agents[i] + self.game.score[agent] = self.game.score[current_agent] + self.rewards[agent] = self.rewards[current_agent] + self._accumulate_rewards() # update to next agent @@ -238,10 +294,14 @@ def step(self, action): @override def observe(self, agent): board_vals = np.array(self.game.board_array, dtype=np.float32) - p_tiles = np.array(self.game.p_tiles[agent], dtype=np.int32) - p_score = np.array(list(self.game.score[agent].values()), dtype=np.int32) - - observation = {"board": board_vals, "tiles": p_tiles, "scores": p_score} + if self.fully_obs: + p_tiles = np.array([item for item in self.game.p_tiles.values()], dtype=np.int32) + else: + p_tiles = np.array(self.game.p_tiles[agent], dtype=np.int32) + tmp = [] + for agent_score in self.game.score.values(): + tmp.append([score for score in agent_score.values()]) + p_score = np.array(tmp, dtype=np.int32) + observation = {"board": board_vals, "racks": p_tiles, "scores": p_score} action_mask = np.array(self.game.return_action_list(), dtype=np.int8) - return {"observation": observation, "action_mask": action_mask} diff --git a/momaland/envs/ingenious/ingenious_base.py b/momaland/envs/ingenious/ingenious_base.py index 4e89ee5d..317da72f 100644 --- a/momaland/envs/ingenious/ingenious_base.py +++ b/momaland/envs/ingenious/ingenious_base.py @@ -1,6 +1,6 @@ -"""Base class for Ingenious environment. +"""Base class for the Ingenious environment. -This class is not meant to be instantiated directly. This class supports the MOIngenious environment and provides the +This class is not meant to be instantiated directly. This class supports the Ingenious environment and provides the board and rules. """ @@ -11,12 +11,6 @@ import numpy as np -# red 12-pointed star -# green circle -# blue 6-pointed star -# orange hexagon -# yellow 24-pointed star -# purple ring RED = 1 GREEN = 2 BLUE = 3 @@ -25,13 +19,12 @@ PURPLE = 6 ALL_COLORS = [RED, GREEN, BLUE, ORANGE, YELLOW, PURPLE] COLOR_NAMES = ["red", "green", "blue", "orange", "yellow", "purple"] - NUM_TILES = 120 Hex = collections.namedtuple("Hex", ["q", "r", "s"]) def hex_coord(q, r, s): - """Create a cube-based coordinates.""" + """Create a cube-based coordinate.""" assert not (round(q + r + s) != 0), "q + r + s must be 0" return Hex(q, r, s) @@ -98,31 +91,32 @@ def generate_board(board_size): class IngeniousBase: - """Base class for Ingenious environment.""" + """Base class for the Ingenious environment.""" - def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limitation_score=18): + def __init__(self, num_agents=2, rack_size=6, num_colors=6, board_size=6, max_score=18): """Initialize the Ingenious environment. Args: - num_players (int): The number of players in the environment. Default: 2 - init_draw (int): The number of tiles each player draws at the beginning of the game. Default: 6 - num_colors (int): The number of colors in the game. Default: 6 - board_size (int): The size of the board. Default: 6 - limitation_score (int): Maximum score for any color. Default: 18 + num_agents (int): The number of agents (between 2 and 6). Default is 2. + rack_size (int): The number of tiles each player keeps in their rack (between 2 and 6). Default is 6. + num_colors (int): The number of colors (objectives) in the game (between 2 and 6). Default is 6. + board_size (int): The size of one side of the hexagonal board (between 3 and 10). Default is 6. + max_score(int): Maximal score possible for any given color/objective. Default: 18 """ - assert 2 <= num_players <= 5, "Number of players must be between 2 and 5." + assert 2 <= num_agents <= 6, "Number of players must be between 2 and 6." assert 2 <= num_colors <= 6, "Number of colors must be between 2 and 6." - assert 2 <= init_draw <= 6, "Number of tiles in hand must be between 2 and 6." + assert 2 <= rack_size <= 6, "Rack size must be between 2 and 6." assert 3 <= board_size <= 10, "Board size must be between 3 and 10." + assert num_agents <= num_colors, "Number of agents cannot be larger than number of colors. " self.board_size = board_size - self.num_player = num_players - self.agents = [f"agent_{i}" for i in range(self.num_player)] + self.num_agents = num_agents + self.agents = [f"agent_{i}" for i in range(self.num_agents)] self.agent_selector = 0 - self.limitation_score = limitation_score + self.max_score = max_score self.colors = num_colors self.corner_color = ALL_COLORS - self.init_draw = init_draw + self.rack_size = rack_size self.board_array = np.zeros([2 * self.board_size - 1, 2 * self.board_size - 1]) self.board_hex = generate_board(self.board_size) # original full board self.action_map = {} @@ -144,7 +138,7 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limit neighbour = hex_neighbor(loc, direct) if neighbour not in self.board_hex: continue - for i in range(0, self.init_draw): + for i in range(0, self.rack_size): if (loc, neighbour, i) not in self.action_map: self.action_map[(loc, neighbour, i)] = self.action_size self.action_index_map[self.action_size] = (loc, neighbour, i) @@ -173,7 +167,7 @@ def reset_game(self, seed=None): neighbour = hex_neighbor(loc, direct) if neighbour not in self.board_hex: continue - for i in range(0, self.init_draw): + for i in range(0, self.rack_size): if (loc, neighbour, i) not in self.action_map: self.action_map[(loc, neighbour, i)] = self.action_size self.action_index_map[self.action_size] = (loc, neighbour, i) @@ -191,17 +185,17 @@ def reset_game(self, seed=None): self.score = {agent: {ALL_COLORS[i]: 0 for i in range(0, self.colors)} for agent in self.agents} def draw_tiles_fill(self): - """Draw tiles for single player with amount(self.init_draw) of tiles.""" - return [self.tiles_bag.pop(self.random.randrange(len(self.tiles_bag))) for _ in range(self.init_draw)] + """Draw rack_size tiles for single player.""" + return [self.tiles_bag.pop(self.random.randrange(len(self.tiles_bag))) for _ in range(self.rack_size)] def get_tile(self, a): """Draw tiles for a specific player.""" - while len(self.p_tiles[a]) < self.init_draw: + while len(self.p_tiles[a]) < self.rack_size: self.p_tiles[a].append(self.tiles_bag.pop(self.random.randrange(len(self.tiles_bag)))) return def initial_corner(self): - """Initialise the corner of the board with the 6 colors.""" + """Initialise the corners of the board with the 6 colors.""" for i in range(0, 6): a = hex_scale(hex_directions[i], self.board_size - 1) x, y = Hex2ArrayLocation(a, self.board_size) @@ -216,7 +210,7 @@ def initial_corner(self): hx2 = hex_neighbor(hx1, j) if (hx2 not in self.board_hex) or (hx1 not in self.board_hex) or (hx2 == a): continue - for card in range(0, self.init_draw): + for card in range(0, self.rack_size): c1 = self.action_map[(hx1, hx2, card)] c2 = self.action_map[(hx2, hx1, card)] self.first_round_pos.add(c1) @@ -238,24 +232,27 @@ def tiles_bag_reset(self): self.tiles_bag = int(NUM_TILES / len(diff_color_combinations + same_color_combinations)) * ( diff_color_combinations + same_color_combinations ) + if self.board_size in [9, 10]: + # cannot fill the board for 9 or 10(complement rule) + self.tiles_bag *= 2 + # Shuffle the tiles bag self.random.shuffle(self.tiles_bag) def set_action_index(self, index): """Apply the corresponding action for the given index on the board.""" - """If selected actions is not a legal move, return False""" assert self.masked_action[index] == 1, "Illegal move, choose a valid action." if self.first_round: assert index in self.first_round_pos, ( "Illegal move, in the first round tiles can only be placed next to " "corners." ) - """Hex Coordinate: h1,h2 ; Tile to play: card""" - h1, h2, card = self.action_index_map[index] + # Hex Coordinate: h1,h2 ; Tile to play: tile + h1, h2, tile = self.action_index_map[index] agent_i = self.agent_selector agent = self.agents[agent_i] - assert card < len(self.p_tiles[agent]), "Illegal move: choosing tile out of hand(happening after ingenious)" - """Extract the certain tile (color1 , color2) as (c1,c2)""" - c1, c2 = self.p_tiles[agent][card] + assert tile < len(self.p_tiles[agent]), "Illegal move: choosing tile out of rack" + # Extract the tile (color1 , color2) as (c1,c2) + c1, c2 = self.p_tiles[agent][tile] # Translate Hex Coordinate to Offset Coordinate(x,y) x1, y1 = Hex2ArrayLocation(h1, self.board_size) x2, y2 = Hex2ArrayLocation(h2, self.board_size) @@ -269,55 +266,60 @@ def set_action_index(self, index): self.p_tiles[agent].remove(item) flag = True break - assert flag, "Illegal move: set the tile to the coordinate unsuccessfully" - """Update the mask_action list after the action""" + assert flag, "Illegal move: unsuccessfully setting the tile to the coordinate" + # Update the mask_action list after the action self.legal_move.remove(index) self.board_array[x1][y1] = c1 self.board_array[x2][y2] = c2 self.exclude_action(h1) self.exclude_action(h2) + if self.first_round: + # In the first round, every player must start in a different corner + self.exclude_position_first_round(h1) + self.exclude_position_first_round(h2) + # Flag to signal if ingenious is called skip_flag = False # flags to avoid calling ingenious on colour that was already maxed out ingenious_possible = [True, True] - if self.score[agent][c1] == self.limitation_score: + if self.score[agent][c1] == self.max_score: ingenious_possible[0] = False - if self.score[agent][c2] == self.limitation_score: + if self.score[agent][c2] == self.max_score: ingenious_possible[1] = False - """Update score through checking 5 neighboring directions for h1 and h2 independently""" + # Update score through checking 5 neighboring directions for h1 and h2 independently self.score[agent][c1] += self.calculate_score_for_piece(h1, h2, c1) self.score[agent][c2] += self.calculate_score_for_piece(h2, h1, c2) - if self.score[agent][c1] > self.limitation_score and ingenious_possible[0]: + if self.score[agent][c1] > self.max_score and ingenious_possible[0]: skip_flag = True - self.score[agent][c1] = self.limitation_score - if self.score[agent][c2] > self.limitation_score and ingenious_possible[1]: + self.score[agent][c1] = self.max_score + if self.score[agent][c2] > self.max_score and ingenious_possible[1]: skip_flag = True - self.score[agent][c2] = self.limitation_score + self.score[agent][c2] = self.max_score - """End game if no more legal actions.""" + # End game if no more legal actions. if len(self.legal_move) == 0: self.end_flag = True - # Preserve the number of tiles in hand for each player to comply with observation dimensions - while len(self.p_tiles[agent]) < self.init_draw: - self.p_tiles[agent].append([0, 0]) + # Preserve the number of tiles in rack for each player to comply with observation dimensions + while len(self.p_tiles[agent]) < self.rack_size: + self.p_tiles[agent].append((0, 0)) return True - """All tiles in hand has been played""" + # All tiles in rack have been played if len(self.p_tiles[agent]) == 0: - self.end_flag = True # The player should win instantly if he plays out all the tiles in hand. - # Preserve the number of tiles in hand for each player to comply with observation dimensions - while len(self.p_tiles[agent]) < self.init_draw: - self.p_tiles[agent].append([0, 0]) + self.end_flag = True # The player should win instantly if he plays out all the tiles in rack. + # Preserve the number of tiles in rack for each player to comply with observation dimensions + while len(self.p_tiles[agent]) < self.rack_size: + self.p_tiles[agent].append((0, 0)) return True - """In the original rules of the game, when a player calls ingenious, they can play a bonus round without - replenishing tiles in hand. However, due to implementation constraints in our case the player replenishes its - hand in all cases (ingenious or not)""" + # In the original rules of the game, when a player calls ingenious, they can play a bonus round without + # replenishing the tiles in the rack. However, in our implementation the player replenishes their rack in all + # cases, ingenious or not. self.get_tile(agent) # Rule that says if you have no tiles of a color, you can swap your tiles with the lowest score. - self.refresh_hand(agent) + self.refresh_rack(agent) # Pass turn to next player if ingenious was not called if not skip_flag: self.next_turn() @@ -344,7 +346,7 @@ def exclude_action(self, hx): hx2 = hex_neighbor(hx, i) if hx2 not in self.board_hex: continue - for card in range(0, self.init_draw): + for card in range(0, self.rack_size): x = self.action_map[(hx, hx2, card)] self.masked_action[x] = 0 if x in self.legal_move: @@ -356,14 +358,14 @@ def exclude_action(self, hx): def next_turn(self): """Move to the next turn.""" - self.agent_selector = (self.agent_selector + 1) % self.num_player + self.agent_selector = (self.agent_selector + 1) % self.num_agents if self.agent_selector == 0 and self.first_round: self.first_round = False return self.agent_selector - def refresh_hand(self, player): - """Additional rule to refresh hand-held tiles.""" - """find the color for which the player has the lowest score""" + def refresh_rack(self, player): + """Additional rule to refresh rack tiles.""" + # find the color for which the player has the lowest score minval = min(self.score[player].values()) flag_lowest_score = False for item in self.p_tiles[player]: @@ -373,7 +375,7 @@ def refresh_hand(self, player): if flag_lowest_score: break if not flag_lowest_score: - """no lowest score color""" + # no lowest score color # save current unused tiles to add them back to the tiles bag back_up = self.p_tiles[player].copy() # clear the player's tiles @@ -381,7 +383,9 @@ def refresh_hand(self, player): # draw new tiles self.get_tile(player) # add unused tiles back to the tiles bag - self.tiles_bag.append(back_up) + # self.tiles_bag.append(back_up) # This could be wrong for append a list of tuple into a list + for item in back_up: + self.tiles_bag.append(item) def return_action_list(self): """Return the legal action list.""" @@ -393,8 +397,30 @@ def return_action_list(self): def log(self): """Print the current status of the game.""" - print({"board_size": self.board_size, "num_players": self.num_player}) + print({"board_size": self.board_size, "num_players": self.num_agents}) print("selector", self.agent_selector) print(self.board_array) print(self.score) print(self.p_tiles) + + def exclude_position_first_round(self, pos): + """Ensure that each player begins with a different corner.""" + for i in range(0, 6): + neighbor_hex = hex_neighbor(pos, i) + if hex_scale(neighbor_hex, 1.0 / (self.board_size - 1)) in hex_directions: + # neighbor_hex is corner + a = neighbor_hex + for k in range(0, 6): + hx1 = hex_neighbor(a, k) + for j in range(0, 6): + hx2 = hex_neighbor(hx1, j) + if (hx2 not in self.board_hex) or (hx1 not in self.board_hex) or (hx2 == a): + continue + for card in range(0, self.rack_size): + c1 = self.action_map[(hx1, hx2, card)] + c2 = self.action_map[(hx2, hx1, card)] + if c1 in self.first_round_pos: + self.first_round_pos.remove(c1) + if c2 in self.first_round_pos: + self.first_round_pos.remove(c2) + break diff --git a/momaland/envs/ingenious/ingenious_check.py b/momaland/envs/ingenious/ingenious_check.py index 2a1f64e3..6295917a 100644 --- a/momaland/envs/ingenious/ingenious_check.py +++ b/momaland/envs/ingenious/ingenious_check.py @@ -4,8 +4,11 @@ import gymnasium import numpy as np -from ingenious import MOIngenious -from ingenious_base import Hex2ArrayLocation + +from momaland.envs.ingenious.ingenious import Ingenious + +# from ingenious import MOIngenious +from momaland.envs.ingenious.ingenious_base import Hex2ArrayLocation def train(ig_env): @@ -21,7 +24,7 @@ def train(ig_env): # print("Action: ", action) ig_env.step(action) observation, reward, truncation, termination, _ = ig_env.last() - # print("Observations: ", observation) + # print("Observations: ", observation['observation']) # print("Rewards: ", reward) # print("Truncation: ", truncation) # print("Termination: ", termination) @@ -60,16 +63,14 @@ def test_move(): Returns: True or False """ - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8) + ig_env = Ingenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) ig_env.reset() # print(ig_env.game.board_array, "nweowjrowhafhif!!!!!!!!!") - flag = True # action map insist the same with index map for i in ig_env.game.action_index_map: h = ig_env.game.action_map.get(ig_env.game.action_index_map[i]) if h is None or h != i: - flag = False break # check legal move index = random_index_of_one(ig_env.game.return_action_list()) @@ -77,44 +78,12 @@ def test_move(): x1, y1 = Hex2ArrayLocation(h1, ig_env.game.board_size) x2, y2 = Hex2ArrayLocation(h2, ig_env.game.board_size) - if ig_env.game.board_array[x1][y1] != 0 or ig_env.game.board_array[x2][y2] != 0: - print("reason1") - flag = False - return flag - + assert ig_env.game.board_array[x1][y1] == 0 and ig_env.game.board_array[x2][y2] == 0, "Place on board is taken." ag = ig_env.agent_selection c1, c2 = ig_env.game.p_tiles[ag][card] - - # print(c1,c2,ig_env.game.board_array[x1][y1],ig_env.game.board_array[x2][y2] ) - # print(ig_env.game.return_action_list()[index]) ig_env.game.set_action_index(index) - # ig_env.step(index) - # print('after',c1, c2, ig_env.game.board_array[x1][y1], ig_env.game.board_array[x2][y2]) - ag = ig_env.agent_selection - if ig_env.game.board_array[x1][y1] != c1 or ig_env.game.board_array[x2][y2] != c2: - flag = False - print("reason2") - return flag - - # check illegal move : put somewhere not allowed - index = random_index_of_zero(ig_env.game.return_action_list()) - if ig_env.game.set_action_index(index): - print("reason3") - flag = False - return flag - - # check illegal move : put some tile out of hand - index = random_index_of_one(ig_env.game.return_action_list()) - - ag = ig_env.game.agents[ig_env.game.agent_selector] - # h1, h2, card = ig_env.game.action_index_map[index] - ig_env.game.p_tiles[ag].clear() - - if ig_env.game.set_action_index(index): - print("reason4") - flag = False - return flag - return flag + assert ig_env.game.board_array[x1][y1] == c1 and ig_env.game.board_array[x2][y2] == c2, "Color is not placed correctly." + print("ingenious_base basic move Passed") def test_step(): @@ -122,11 +91,12 @@ def test_step(): Returns: True or False """ - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8) + ig_env = Ingenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) ig_env.reset() flag = True # check legal step + ag = ig_env.agent_selection obs = ig_env.observe(ag) @@ -150,7 +120,7 @@ def test_step(): flag = False print("reason2") return flag - + """ # check illegal move : put somewhere not allowed obs = ig_env.observe(ag) masked_act_list = obs["action_mask"] @@ -176,6 +146,8 @@ def test_step(): # check selector + """ + return flag @@ -185,7 +157,7 @@ def test_reset(): Returns: True or False """ - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=4) + ig_env = Ingenious(num_agents=2, rack_size=2, num_colors=2, board_size=4) ig_env.reset(105) train(ig_env) ig_env.reset(110) @@ -205,15 +177,21 @@ def test_reset(): flag = False if len(ig_env.game.tiles_bag) < 100: flag = False + if flag: + print("Reset test Passed") + else: + print("Reset test Rejected") return flag def test_ingenious_rule(): """Ingenious rule test in a small case setting; when game end successfully, no agent should successively play 3 times.""" - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8, limitation_score=10) + ig_env = Ingenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) ag = -1 sum = 0 ig_env.reset() + ig_env.game.max_score = 5 + done = False if_exeed = True if_ingenious = False @@ -236,17 +214,20 @@ def test_ingenious_rule(): if sum == 1: if_ingenious = True break + if if_ingenious and if_exeed: + print("Ingenious rule check Passed") + return if_ingenious and if_exeed def test_API(): """Test observe interface in ingenous.py.""" - ig_env = MOIngenious(limitation_score=10000) - ag = ig_env.agent_selection - obs = ig_env.observe(ag) - masked_act_list = obs["action_mask"] - print(sum(masked_act_list)) - print(sum(ig_env.game.masked_action)) + ig_env = Ingenious() + ig_env.max_score = 10000 + # ag = ig_env.agent_selection + # obs = ig_env.observe(ag) + # print(sum(masked_act_list)) + # print(sum(ig_env.game.masked_action)) env = ig_env env.reset() # observation_0 @@ -264,8 +245,8 @@ def test_API(): } for agent in env.agent_iter(env.num_agents * num_cycles): generated_agents.add(agent) - print(agent, has_finished, generated_agents) - print(env.last()) + # print(agent, has_finished, generated_agents) + # print(env.last()) assert agent not in has_finished, "agents cannot resurrect! Generate a new agent with a new name." assert isinstance(env.infos[agent], dict), "an environment agent's info must be a dictionary" prev_observe, reward, terminated, truncated, info = env.last() @@ -318,42 +299,147 @@ def test_API(): if isinstance(env.observation_space(agent), gymnasium.spaces.Box): assert env.observation_space(agent).dtype == prev_observe.dtype - assert env.observation_space(agent).contains(prev_observe), "Out of bounds observation: " + str(prev_observe) - assert env.observation_space(agent).contains(prev_observe), "Agent's observation is outside of it's observation space" + # These codes are some left codes no need anymore for action is already taken in the env and test_observation not used anymore. + # assert env.observation_space(agent).contains(prev_observe), "Out of bounds observation: " + str(prev_observe) + # assert env.observation_space(agent).contains(prev_observe), "Agent's observation is outside of it's observation space" # test_observation(prev_observe, observation_0) if not isinstance(env.infos[env.agent_selection], dict): print("The info of each agent should be a dict, use {} if you aren't using info") if not env.agents: assert has_finished == generated_agents, "not all agents finished, some were skipped over" + print("API ingenious.py Passed") + + +def check_fully_observable(): + """Test observable trigger in ingenous.py.""" + ig_env = Ingenious(fully_obs=True) + ig_env.reset() + ag = ig_env.agent_selection + obs = ig_env.observe(ag) + print("Observation", obs) + print("Fully Observable: Pass") + + +def check_two_team(): + """Test teammate(reward sharing) in ingenous.py.""" + ig_env = Ingenious(num_agents=4, reward_mode="two_teams") + ig_env.reset() + ag = ig_env.agent_selection + obs = ig_env.observe(ag) + # index = random_index_of_one(ig_env.game.return_action_list()) + # ig_env.step(index) + print("Start check_two_team") + print("Observation", obs) + done = False + while not done: + ag = ig_env.agent_selection + print("Agent: ", ag) + obs = ig_env.observe(ag) + masked_act_list = obs["action_mask"] + action = random_index_of_one(masked_act_list) + print("Action: ", action) + ig_env.step(action) + observation, reward, termination, truncation, _ = ig_env.last() + print("Observations: ", observation["observation"]) + print("Rewards: ", reward, "_accumulate_reward(from gymnasium code)", ig_env._cumulative_rewards) + print("Truncation: ", truncation) + print("Termination: ", termination) + done = truncation or termination + print(ig_env.game.score) + print("Stop check_two_team") + + +def check_collaborative(): + """Test teammate(reward sharing) in ingenous.py.""" + ig_env = Ingenious(num_agents=4, reward_mode="collaborative") + ig_env.reset() + ag = ig_env.agent_selection + obs = ig_env.observe(ag) + # index = random_index_of_one(ig_env.game.return_action_list()) + # ig_env.step(index) + print("Start check_collaborative") + print("Observation", obs) + done = False + while not done: + ag = ig_env.agent_selection + print("Agent: ", ag) + obs = ig_env.observe(ag) + masked_act_list = obs["action_mask"] + action = random_index_of_one(masked_act_list) + print("Action: ", action) + ig_env.step(action) + observation, reward, termination, truncation, _ = ig_env.last() + print("Observations: ", observation["observation"]) + print("Rewards: ", reward, "_accumulate_reward(from gymnasium code)", ig_env._cumulative_rewards) + print("Truncation: ", truncation) + print("Termination: ", termination) + done = truncation or termination + print(ig_env.game.score) + print("Stop check_collaborative") + + +def check_parameter_range(): + """Simulate all possible parameter to test the game with random choices.""" + for n_player in range(2, 7): + for draw in range(2, 7): + for color in range(n_player, 7): + for bs in range(0, 10): + for teammate in ["competitive", "collaborative", "two_teams"]: + for fully_obs in [True, False]: + print( + "num_players=", + n_player, + " init_draw=", + draw, + "num_colors=", + color, + "board_size=", + bs, + "teammate_mode=", + teammate, + "fully_obs=", + fully_obs, + "render_mode=", + None, + ) + + try: + ig_env = Ingenious( + num_agents=n_player, + rack_size=draw, + num_colors=color, + board_size=bs, + reward_mode=teammate, + fully_obs=fully_obs, + render_mode=None, + ) + ig_env.reset() + train(ig_env) + except Exception as e: + print(e) + pass + else: + print("PASS") if __name__ == "__main__": - # ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8) - # ag = ig_env.agent_selection - # ig_env.reset() - t1 = test_ingenious_rule() - # t1 = True - # ig_env.reset() - t2 = test_reset() - # ig_env.reset() - # t3 = test_move() # no need anymore - t4 = test_step() - - if t1: - print("Accepted: ingenious rule test") - else: - print("Rejected: ingenious rule test") - if t2: - print("Accepted: reset test") - else: - print("Rejected: reset test") - # if t3: - # print("Accepted: move in ingenious_base test") - # else: - # print("Rejected: move in ingenious_base test") - if t4: - print("Accepted: move in step test") - else: - print("Rejected: move in step test") + # test move of inginous_base.py + test_move() + # test API + test_API() + # test inginious rule + test_ingenious_rule() + + # run this function, you could always find opponents' tiles in observation space + check_fully_observable() + + # check two_team mode through simulation, it could be found that teammates always share the same score in score board. + check_two_team() + + # check collaborative mode through simulation, it could be found that every players always share the same score in score board. + check_collaborative() + + # check parameter range by ramdom choose. + check_parameter_range() diff --git a/momaland/envs/ingenious/ingenious_seedtest.py b/momaland/envs/ingenious/ingenious_seedtest.py index a9215f36..17abcb2b 100644 --- a/momaland/envs/ingenious/ingenious_seedtest.py +++ b/momaland/envs/ingenious/ingenious_seedtest.py @@ -3,7 +3,7 @@ # import random import numpy as np -from ingenious import MOIngenious +from ingenious import Ingenious # from ingenious_base import Hex2ArrayLocation @@ -165,9 +165,9 @@ def data_equivalence(data_1, data_2) -> bool: if __name__ == "__main__": - ig_env = MOIngenious(num_players=4, init_draw=4, num_colors=4, board_size=8) + ig_env = Ingenious(num_agents=4, rack_size=4, num_colors=4, board_size=8) - ig_env2 = MOIngenious(num_players=4, init_draw=4, num_colors=4, board_size=8) + ig_env2 = Ingenious(num_agents=4, rack_size=4, num_colors=4, board_size=8) env1 = ig_env env2 = ig_env2