From 0e8ece70e2ffc6a3d86f7612bb7b96c777ba6b99 Mon Sep 17 00:00:00 2001 From: penggao00 Date: Wed, 28 Feb 2024 01:15:23 +0100 Subject: [PATCH 01/18] some tests for parameters and new modes --- momaland/envs/ingenious/ingenious.py | 52 ++++++-- momaland/envs/ingenious/ingenious_base.py | 20 +-- momaland/envs/ingenious/ingenious_check.py | 143 +++++++++++++++++---- 3 files changed, 172 insertions(+), 43 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 998afee6..57df429c 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -46,29 +46,37 @@ class MOIngenious(MOAECEnv): metadata = {"render_modes": ["human"], "name": "moingenious_v0", "is_parallelizable": False} - def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=8, limitation_score=18, render_mode=None): + def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, reward_sharing=None, fully_obs=False, render_mode=None,): """Initializes the multi-objective Ingenious game. Args: num_players (int): The number of players in the environment. Default: 2 init_draw (int): The number of tiles each player draws at the beginning of the game. Default: 6 num_colors (int): The number of colors in the game. Default: 4 - board_size (int): The size of the board. Default: 8 - limitation_score(int): Limitation to refresh the score board for any color. Default: 20 + board_size (int): The size of the board. Default: 0 (0 means the board size id dependent on num_players like { 2:6, 3:7 , 4:8}; otherwise, set the board_size freely between 3 and 8) + #limitation_score(int): Limitation to refresh the score board for any color. Default: 20 + reward_sharing: Partnership Game.It should be a set like {'agent_0':0, 'agent_1':0,'agent_2':1, 'agent_3':1} where teammates will share the reward. Default: None + fully_obs: Fully observable or not. Default:False render_mode (str): The rendering mode. Default: None """ - self.board_size = board_size + self.num_colors = num_colors self.init_draw = init_draw self.num_players = num_players - self.limitation_score = limitation_score + self.limitation_score = 18 # max score in score board for one certain color. + self.reward_sharing = reward_sharing + self.fully_obs = fully_obs + if board_size == 0: + self.board_size = { 2:6, 3:7, 4:8}.get(self.num_players) + else: + self.board_size = board_size self.game = IngeniousBase( - num_players=num_players, - init_draw=init_draw, - num_colors=num_colors, - board_size=board_size, - limitation_score=limitation_score, + num_players=self.num_players, + init_draw=self.init_draw, + num_colors=self.num_colors, + board_size=self.board_size, + limitation_score=self.limitation_score, ) self.possible_agents = ["agent_" + str(r) for r in range(num_players)] @@ -93,7 +101,7 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=8, limit "board": Box( 0, len(ALL_COLORS), shape=(2 * self.board_size - 1, 2 * self.board_size - 1), dtype=np.float32 ), - "tiles": Box(0, self.num_colors, shape=(self.init_draw, 2), dtype=np.int32), + "tiles": Box(0, self.num_colors, shape=(self.init_draw, ), dtype=np.int32), "scores": Box(0, self.game.limitation_score, shape=(self.num_colors,), dtype=np.int32), } ), @@ -186,6 +194,14 @@ def step(self, action): # update accumulate_rewards self._accumulate_rewards() + # update teammate score(copy current agent score to the teammate) + if self.reward_sharing is not None: + index=self.reward_sharing[current_agent] + for agent in self.agents: + if agent != current_agent and self.reward_sharing[agent]==index: + self.game.score[agent]=self.game.score[current_agent] + + # update to next agent self.agent_selection = self.agents[self.game.agent_selector] @@ -194,12 +210,22 @@ def step(self, action): else: self.refresh_cumulative_reward = False + + @override def observe(self, agent): board_vals = np.array(self.game.board_array, dtype=np.float32) - p_tiles = np.array(self.game.p_tiles[agent], dtype=np.int32) - p_score = np.array(list(self.game.score[agent].values()), dtype=np.int32) + if self.fully_obs: + p_tiles = np.array([item for item in self.game.p_tiles.values()], dtype=np.int32) + tmp=[] + for agent_score in self.game.score.values(): + tmp.append([score for score in agent_score.values()]) + p_score = np.array(tmp, dtype=np.int32) + else: + # print(self.game.p_tiles[agent]) + p_tiles = np.array(self.game.p_tiles[agent], dtype=np.int32) + p_score = np.array(list(self.game.score[agent].values()), dtype=np.int32) observation = {"board": board_vals, "tiles": p_tiles, "scores": p_score} action_mask = np.array(self.game.return_action_list(), dtype=np.int8) diff --git a/momaland/envs/ingenious/ingenious_base.py b/momaland/envs/ingenious/ingenious_base.py index 3dae81ed..40d7b3ff 100644 --- a/momaland/envs/ingenious/ingenious_base.py +++ b/momaland/envs/ingenious/ingenious_base.py @@ -100,7 +100,7 @@ def generate_board(board_size): class IngeniousBase: """Base class for Ingenious environment.""" - def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=8, limitation_score=18): + def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limitation_score=18): """Initialize the Ingenious environment. Args: @@ -110,10 +110,11 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=8, limit board_size (int): Size of the board. limitation_score(int): Limitation to refresh the score board for any color. Default: 20 """ - assert 2 <= num_players <= 5, "Number of players must be between 2 and 5." + assert 2 <= num_players <= 6, "Number of players must be between 2 and 6." assert 2 <= num_colors <= 6, "Number of colors must be between 2 and 6." assert 2 <= init_draw <= 6, "Number of tiles in hand must be between 2 and 6." - assert 3 <= board_size <= 10, "Board size must be between 3 and 10." + assert 3 <= board_size <= 8, "Board size must be between 3 and 8." + assert num_players<=num_colors,"Number of players should be smaller than number of colors. " self.board_size = board_size self.num_player = num_players @@ -276,9 +277,9 @@ def set_action_index(self, index): self.board_array[x2][y2] = c2 self.exclude_action(h1) self.exclude_action(h2) - # Flag to signal if ingenious is called + """Flag to signal if ingenious is called """ skip_flag = False - # flags to avoid calling ingenious on colour that was already maxed out + """flags to avoid calling ingenious on colour that was already maxed out """ ingenious_possible = [True, True] if self.score[agent][c1] == self.limitation_score: ingenious_possible[0] = False @@ -301,7 +302,7 @@ def set_action_index(self, index): self.end_flag = True # Preserve the number of tiles in hand for each player to comply with observation dimensions while len(self.p_tiles[agent]) < self.init_draw: - self.p_tiles[agent].append([0, 0]) + self.p_tiles[agent].append((0, 0)) return True """All tiles in hand has been played""" @@ -309,7 +310,7 @@ def set_action_index(self, index): self.end_flag = True # The player should win instantly if he plays out all the tiles in hand. # Preserve the number of tiles in hand for each player to comply with observation dimensions while len(self.p_tiles[agent]) < self.init_draw: - self.p_tiles[agent].append([0, 0]) + self.p_tiles[agent].append((0, 0)) return True """In the original rules of the game, when a player calls ingenious, they can play a bonus round without @@ -368,6 +369,7 @@ def refresh_hand(self, player): flag_lowest_score = False for item in self.p_tiles[player]: for col in item: + # print(player,self.p_tiles[player],item, col, self.score[player]) if self.score[player][col] == minval: flag_lowest_score = True if flag_lowest_score: @@ -381,7 +383,9 @@ def refresh_hand(self, player): # draw new tiles self.get_tile(player) # add unused tiles back to the tiles bag - self.tiles_bag.append(back_up) + # self.tiles_bag.append(back_up) # This could be wrong for append a list of tuple into a list + for item in back_up: + self.tiles_bag.append(item) def return_action_list(self): """Return the legal action list.""" diff --git a/momaland/envs/ingenious/ingenious_check.py b/momaland/envs/ingenious/ingenious_check.py index 2a1f64e3..e7aaf0ca 100644 --- a/momaland/envs/ingenious/ingenious_check.py +++ b/momaland/envs/ingenious/ingenious_check.py @@ -21,7 +21,7 @@ def train(ig_env): # print("Action: ", action) ig_env.step(action) observation, reward, truncation, termination, _ = ig_env.last() - # print("Observations: ", observation) + # print("Observations: ", observation['observation']) # print("Rewards: ", reward) # print("Truncation: ", truncation) # print("Termination: ", termination) @@ -95,14 +95,16 @@ def test_move(): flag = False print("reason2") return flag - + """ + According to last time reviewer's suggestion, the illegal move will use 'assert' to stop the program instead of warning; this stops my test code to verify the illegal move. + # check illegal move : put somewhere not allowed index = random_index_of_zero(ig_env.game.return_action_list()) if ig_env.game.set_action_index(index): print("reason3") flag = False return flag - + # check illegal move : put some tile out of hand index = random_index_of_one(ig_env.game.return_action_list()) @@ -115,6 +117,7 @@ def test_move(): flag = False return flag return flag + """ def test_step(): @@ -127,6 +130,8 @@ def test_step(): flag = True # check legal step + + ag = ig_env.agent_selection obs = ig_env.observe(ag) @@ -150,7 +155,7 @@ def test_step(): flag = False print("reason2") return flag - + """ # check illegal move : put somewhere not allowed obs = ig_env.observe(ag) masked_act_list = obs["action_mask"] @@ -162,7 +167,7 @@ def test_step(): print("reason3") flag = False return flag - + # check illegal move : put some tile out of hand index = random_index_of_one(ig_env.game.masked_action) ag = ig_env.agent_selection @@ -175,6 +180,10 @@ def test_step(): return flag # check selector + + """ + + return flag @@ -241,7 +250,8 @@ def test_ingenious_rule(): def test_API(): """Test observe interface in ingenous.py.""" - ig_env = MOIngenious(limitation_score=10000) + ig_env = MOIngenious() + ig_env.limitation_score=10000 ag = ig_env.agent_selection obs = ig_env.observe(ag) masked_act_list = obs["action_mask"] @@ -328,32 +338,121 @@ def test_API(): if not env.agents: assert has_finished == generated_agents, "not all agents finished, some were skipped over" +def check_fully_observable(): + """Test observable trigger in ingenous.py.""" + ig_env = MOIngenious(fully_obs=True) + ig_env.reset() + ag=ig_env.agent_selection + obs = ig_env.observe(ag) + print("Observation",obs) + +def check_teammate(): + """Test teammate(reward sharing) in ingenous.py.""" + ig_env = MOIngenious(num_players=4,reward_sharing={'agent_0':0, 'agent_1':0,'agent_2':1, 'agent_3':1}) + ig_env.reset() + ag=ig_env.agent_selection + obs = ig_env.observe(ag) + index = random_index_of_one(ig_env.game.return_action_list()) + ig_env.step(index) + print("Observation",obs) + done = False + while not done: + ag = ig_env.agent_selection + print("Agent: ", ag) + obs = ig_env.observe(ag) + masked_act_list = obs["action_mask"] + action = random_index_of_one(masked_act_list) + #print("Observation: ",obs) + print("Action: ", action) + ig_env.step(action) + observation, reward, truncation, termination, _ = ig_env.last() + print("Observations: ", observation['observation']) + print("Rewards: ", reward) + print("Truncation: ", truncation) + print("Termination: ", termination) + done = truncation or termination + print(ig_env.game.score) + ig_env = MOIngenious(num_players=4,reward_sharing={'agent_0':1, 'agent_1':0,'agent_2':0, 'agent_3':1}) + ig_env.reset() + ag=ig_env.agent_selection + obs = ig_env.observe(ag) + index = random_index_of_one(ig_env.game.return_action_list()) + ig_env.step(index) + print("Observation",obs) + done = False + while not done: + ag = ig_env.agent_selection + print("Agent: ", ag) + obs = ig_env.observe(ag) + masked_act_list = obs["action_mask"] + action = random_index_of_one(masked_act_list) + #print("Observation: ",obs) + print("Action: ", action) + ig_env.step(action) + observation, reward, truncation, termination, _ = ig_env.last() + print("Observations: ", observation['observation']) + print("Rewards: ", reward) + print("Truncation: ", truncation) + print("Termination: ", termination) + done = truncation or termination + print(ig_env.game.score) + + + + if __name__ == "__main__": # ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8) # ag = ig_env.agent_selection # ig_env.reset() - t1 = test_ingenious_rule() - # t1 = True + #t1 = test_ingenious_rule() + #t1 = True # ig_env.reset() - t2 = test_reset() + #t1 = test_reset() + # if t1: + # print("Accepted: reset test") + # else: + # print("Rejected: reset test") # ig_env.reset() # t3 = test_move() # no need anymore - t4 = test_step() + # t4 = test_step() + # check_fully_observable() + # check_teammate() - if t1: - print("Accepted: ingenious rule test") - else: - print("Rejected: ingenious rule test") - if t2: - print("Accepted: reset test") - else: - print("Rejected: reset test") + ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=0, reward_sharing=None, + fully_obs=False, render_mode=None) + ig_env.reset() + train(ig_env) + + for n_player in range(2,5): + for draw in range(2,7): + for color in range(n_player,7): + for bs in range(0,9): + if bs in [1,2]: + continue + print("num_players=",n_player, " init_draw=",draw, "num_colors=", color, "board_size=", bs, "reward_sharing=", None, "fully_obs=", False, "render_mode=", None) + ig_env = MOIngenious(num_players=n_player, init_draw=draw, num_colors=color, board_size=bs, reward_sharing=None, fully_obs=False, render_mode=None) + ig_env.reset() + train(ig_env) + print("PASS") + + + + + + #if t1: + # print("Accepted: ingenious rule test") + #else: + # print("Rejected: ingenious rule test") + #if t2: + # print("Accepted: reset test") + #else: + # print("Rejected: reset test") # if t3: # print("Accepted: move in ingenious_base test") # else: # print("Rejected: move in ingenious_base test") - if t4: - print("Accepted: move in step test") - else: - print("Rejected: move in step test") + #if t4: + # print("Accepted: move in step test") + #else: + # print("Rejected: move in step test") From ee0c61c06c0dd37fb82dc1bdc37d23d7abcc978d Mon Sep 17 00:00:00 2001 From: penggao00 Date: Thu, 29 Feb 2024 21:12:18 +0100 Subject: [PATCH 02/18] plyer 6 size 10 tile bag times 2 --- momaland/envs/ingenious/ingenious.py | 14 +++++++------- momaland/envs/ingenious/ingenious_base.py | 9 ++++++++- momaland/envs/ingenious/ingenious_check.py | 12 ++++++++---- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 57df429c..3af5c5bc 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -67,7 +67,7 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, rewar self.reward_sharing = reward_sharing self.fully_obs = fully_obs if board_size == 0: - self.board_size = { 2:6, 3:7, 4:8}.get(self.num_players) + self.board_size = { 2:6, 3:7, 4:8, 5:9, 6:10}.get(self.num_players) else: self.board_size = board_size @@ -217,16 +217,16 @@ def observe(self, agent): board_vals = np.array(self.game.board_array, dtype=np.float32) if self.fully_obs: p_tiles = np.array([item for item in self.game.p_tiles.values()], dtype=np.int32) - tmp=[] - for agent_score in self.game.score.values(): - tmp.append([score for score in agent_score.values()]) - p_score = np.array(tmp, dtype=np.int32) else: # print(self.game.p_tiles[agent]) p_tiles = np.array(self.game.p_tiles[agent], dtype=np.int32) + # p_score = np.array(list(self.game.score[agent].values()), dtype=np.int32) + # show all score board + tmp = [] + for agent_score in self.game.score.values(): + tmp.append([score for score in agent_score.values()]) + p_score = np.array(tmp, dtype=np.int32) - p_score = np.array(list(self.game.score[agent].values()), dtype=np.int32) observation = {"board": board_vals, "tiles": p_tiles, "scores": p_score} action_mask = np.array(self.game.return_action_list(), dtype=np.int8) - return {"observation": observation, "action_mask": action_mask} diff --git a/momaland/envs/ingenious/ingenious_base.py b/momaland/envs/ingenious/ingenious_base.py index 40d7b3ff..b4e7f193 100644 --- a/momaland/envs/ingenious/ingenious_base.py +++ b/momaland/envs/ingenious/ingenious_base.py @@ -113,7 +113,7 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limit assert 2 <= num_players <= 6, "Number of players must be between 2 and 6." assert 2 <= num_colors <= 6, "Number of colors must be between 2 and 6." assert 2 <= init_draw <= 6, "Number of tiles in hand must be between 2 and 6." - assert 3 <= board_size <= 8, "Board size must be between 3 and 8." + assert 3 <= board_size <= 10, "Board size must be between 3 and 8." assert num_players<=num_colors,"Number of players should be smaller than number of colors. " self.board_size = board_size @@ -239,6 +239,13 @@ def tiles_bag_reset(self): self.tiles_bag = int(NUM_TILES / len(diff_color_combinations + same_color_combinations)) * ( diff_color_combinations + same_color_combinations ) + #print(len(self.tiles_bag)) + if self.board_size in [9,10]: + # cannot fill the board for 9 or 10(complement rule) + self.tiles_bag*=2 + + + # Shuffle the tiles bag self.random.shuffle(self.tiles_bag) diff --git a/momaland/envs/ingenious/ingenious_check.py b/momaland/envs/ingenious/ingenious_check.py index e7aaf0ca..8163b0c9 100644 --- a/momaland/envs/ingenious/ingenious_check.py +++ b/momaland/envs/ingenious/ingenious_check.py @@ -348,7 +348,7 @@ def check_fully_observable(): def check_teammate(): """Test teammate(reward sharing) in ingenous.py.""" - ig_env = MOIngenious(num_players=4,reward_sharing={'agent_0':0, 'agent_1':0,'agent_2':1, 'agent_3':1}) + ig_env = MOIngenious(num_players=4,reward_sharing={'agent_0':0, 'agent_1':1,'agent_2':1, 'agent_3':1}) ig_env.reset() ag=ig_env.agent_selection obs = ig_env.observe(ag) @@ -423,23 +423,27 @@ def check_teammate(): fully_obs=False, render_mode=None) ig_env.reset() train(ig_env) - - for n_player in range(2,5): + #print(ig_env.last()) + for n_player in range(5,7): for draw in range(2,7): for color in range(n_player,7): - for bs in range(0,9): + for bs in range(0,10): if bs in [1,2]: continue + if n_player in range(5,7): + bs=0 print("num_players=",n_player, " init_draw=",draw, "num_colors=", color, "board_size=", bs, "reward_sharing=", None, "fully_obs=", False, "render_mode=", None) ig_env = MOIngenious(num_players=n_player, init_draw=draw, num_colors=color, board_size=bs, reward_sharing=None, fully_obs=False, render_mode=None) ig_env.reset() train(ig_env) + print("PASS") + #if t1: # print("Accepted: ingenious rule test") #else: From cddaabf28691ee6c77ef3fffb98966d56683dc01 Mon Sep 17 00:00:00 2001 From: penggao00 Date: Thu, 7 Mar 2024 16:44:20 +0100 Subject: [PATCH 03/18] each player takes different corner in first round --- momaland/envs/ingenious/ingenious_base.py | 32 +++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/momaland/envs/ingenious/ingenious_base.py b/momaland/envs/ingenious/ingenious_base.py index b4e7f193..95d7fb5f 100644 --- a/momaland/envs/ingenious/ingenious_base.py +++ b/momaland/envs/ingenious/ingenious_base.py @@ -284,6 +284,12 @@ def set_action_index(self, index): self.board_array[x2][y2] = c2 self.exclude_action(h1) self.exclude_action(h2) + if self.first_round: + #if first round, each player should take different corner + print(h1,h2) + self.exclude_position_first_round(h1) + self.exclude_position_first_round(h2) + """Flag to signal if ingenious is called """ skip_flag = False """flags to avoid calling ingenious on colour that was already maxed out """ @@ -409,3 +415,29 @@ def log(self): print(self.board_array) print(self.score) print(self.p_tiles) + def exclude_position_first_round(self, pos): + """Exclude available position in self.first_round_pos to ensure that each player begins with a different corner ( each corner is taken once). """ + for i in range(0, 6): + corner = hex_scale(hex_directions[i], self.board_size - 1) + for i in range(0, 6): + neighbor_hex = hex_neighbor(pos, i) + if hex_scale(neighbor_hex, 1.0/(self.board_size-1)) in hex_directions: + #neighbor_hex is corner + a=neighbor_hex + print("find the corner to remove in first round",a,pos) + for k in range(0, 6): + hx1 = hex_neighbor(a, k) + for j in range(0, 6): + hx2 = hex_neighbor(hx1, j) + if (hx2 not in self.board_hex) or (hx1 not in self.board_hex) or (hx2 == a): + continue + for card in range(0, self.init_draw): + c1 = self.action_map[(hx1, hx2, card)] + c2 = self.action_map[(hx2, hx1, card)] + if c1 in self.first_round_pos: + self.first_round_pos.remove(c1) + if c2 in self.first_round_pos: + self.first_round_pos.remove(c2) + break + + From 7e0b2934fb6618d99bb878573a300292a0007697 Mon Sep 17 00:00:00 2001 From: penggao00 Date: Thu, 7 Mar 2024 16:57:30 +0100 Subject: [PATCH 04/18] Document update in ingenious.py --- momaland/envs/ingenious/ingenious.py | 48 ++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 3af5c5bc..e53808ea 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -3,8 +3,56 @@ This environment is based on the Ingenious game: https://boardgamegeek.com/boardgame/9674/ingenious Every color is a different objective. The goal in the original game is to maximize the minimum score over all colors, however we leave the utility wrapper up to the users and only return the vectorial score on each color dimension. +|---|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Agents names | `agent_i for i in [0, 5]` | +| Action Space | Discrete(5544) | +| Observation Space | Dict('action_mask': Box(0, 1, (5544,), int8), 'observation': Dict('board': Box(0.0, 6.0, (15, 15), float32), 'scores': Box(0, 18, (6,), int32), 'tiles': Box(0, 6, (6, 2), int32))) | +| Reward Space | Box(0.0, 18.0, (6,), float32) | +| Import | `momaland.envs.moingenious_v0` + +## Observation Space + +Non Fixed size of the board????? + +The observation space is a continuous box with the length `(num_drones + 1) * 3` where each 3 values represent the XYZ coordinates of the drones in this order: +- the agent. +- the target. +- the other agents. + +Example: + + +## Action Space +The action space is a discrete index representing the move that put tile with color(c1,c2) to the position (h1,h2). + +## Reward Space +The reward space is a 2D vector containing rewards for: +- After certain action, for the current player i, the difference between the old score and the new score for each color in the score board. + +## Starting State +TODO + +## Episode Termination +The episode is terminated if one of the following conditions are met: +- The board is filled. +- Sequential "ingenious" move until using up the tiles.(Complemented rule for winning). + +## Episode Truncation +TODO + +## Init Function +def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, reward_sharing=None, fully_obs=False, render_mode=None,) +- num_players (int): The number of players in the environment. Default: 2 +- init_draw (int): The number of tiles each player draws at the beginning of the game. Default: 6 +- num_colors (int): The number of colors in the game. Default: 4 +- board_size (int): The size of the board. Default: 0 (0 means the board size id dependent on num_players like { 2:6, 3:7 , 4:8}; otherwise, set the board_size freely between 3 and 8) +- reward_sharing: Partnership Game.It should be a set like {'agent_0':0, 'agent_1':0,'agent_2':1, 'agent_3':1} where teammates will share the reward. Default: None +- fully_obs: Fully observable or not. Default:False +- render_mode (str): The rendering mode. Default: None + """ + import functools import random From 68f6db0d92aa14181fe8e46f33a34b95a900755d Mon Sep 17 00:00:00 2001 From: penggao00 Date: Thu, 14 Mar 2024 00:49:42 +0100 Subject: [PATCH 05/18] update Mar 14 --- momaland/envs/ingenious/ingenious.py | 27 +++-- momaland/envs/ingenious/ingenious_base.py | 4 +- momaland/envs/ingenious/ingenious_check.py | 127 ++++++++------------- 3 files changed, 64 insertions(+), 94 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index e53808ea..6e099683 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -94,7 +94,7 @@ class MOIngenious(MOAECEnv): metadata = {"render_modes": ["human"], "name": "moingenious_v0", "is_parallelizable": False} - def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, reward_sharing=None, fully_obs=False, render_mode=None,): + def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, teammate_mode=False, fully_obs=False, render_mode=None,): """Initializes the multi-objective Ingenious game. Args: @@ -102,8 +102,7 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, rewar init_draw (int): The number of tiles each player draws at the beginning of the game. Default: 6 num_colors (int): The number of colors in the game. Default: 4 board_size (int): The size of the board. Default: 0 (0 means the board size id dependent on num_players like { 2:6, 3:7 , 4:8}; otherwise, set the board_size freely between 3 and 8) - #limitation_score(int): Limitation to refresh the score board for any color. Default: 20 - reward_sharing: Partnership Game.It should be a set like {'agent_0':0, 'agent_1':0,'agent_2':1, 'agent_3':1} where teammates will share the reward. Default: None + teammate_mode: Partnership Game or not. Default:False fully_obs: Fully observable or not. Default:False render_mode (str): The rendering mode. Default: None """ @@ -112,7 +111,11 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, rewar self.init_draw = init_draw self.num_players = num_players self.limitation_score = 18 # max score in score board for one certain color. - self.reward_sharing = reward_sharing + self.teammate_mode=teammate_mode + if self.teammate_mode is True: + assert num_players%2 == 0, "Number of players must be even if teammate_mode is on." + self.limitation_score=self.limitation_score*(num_players/2) + self.fully_obs = fully_obs if board_size == 0: self.board_size = { 2:6, 3:7, 4:8, 5:9, 6:10}.get(self.num_players) @@ -230,6 +233,7 @@ def step(self, action): if self.refresh_cumulative_reward: self._cumulative_rewards[current_agent] = np.zeros(self.num_colors, dtype="float64") + #update current agent if not self.game.end_flag: prev_rewards = np.array(list(self.game.score[current_agent].values())) self.game.set_action_index(action) @@ -239,16 +243,17 @@ def step(self, action): if self.game.end_flag: self.terminations = {agent: True for agent in self.agents} - # update accumulate_rewards - self._accumulate_rewards() - # update teammate score(copy current agent score to the teammate) - if self.reward_sharing is not None: - index=self.reward_sharing[current_agent] - for agent in self.agents: - if agent != current_agent and self.reward_sharing[agent]==index: + if self.teammate_mode is True: + index_current_agent=self.agents.index(current_agent) + for i in range(0,self.num_players): + if i!=index_current_agent and i%2==index_current_agent%2: + agent=self.agents[i] self.game.score[agent]=self.game.score[current_agent] + self.rewards[agent]= self.rewards[current_agent] + # update accumulate_rewards + self._accumulate_rewards() # update to next agent self.agent_selection = self.agents[self.game.agent_selector] diff --git a/momaland/envs/ingenious/ingenious_base.py b/momaland/envs/ingenious/ingenious_base.py index 95d7fb5f..e8a54111 100644 --- a/momaland/envs/ingenious/ingenious_base.py +++ b/momaland/envs/ingenious/ingenious_base.py @@ -113,7 +113,7 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limit assert 2 <= num_players <= 6, "Number of players must be between 2 and 6." assert 2 <= num_colors <= 6, "Number of colors must be between 2 and 6." assert 2 <= init_draw <= 6, "Number of tiles in hand must be between 2 and 6." - assert 3 <= board_size <= 10, "Board size must be between 3 and 8." + assert 3 <= board_size <= 10, "Board size must be between 3 and 10." assert num_players<=num_colors,"Number of players should be smaller than number of colors. " self.board_size = board_size @@ -417,8 +417,6 @@ def log(self): print(self.p_tiles) def exclude_position_first_round(self, pos): """Exclude available position in self.first_round_pos to ensure that each player begins with a different corner ( each corner is taken once). """ - for i in range(0, 6): - corner = hex_scale(hex_directions[i], self.board_size - 1) for i in range(0, 6): neighbor_hex = hex_neighbor(pos, i) if hex_scale(neighbor_hex, 1.0/(self.board_size-1)) in hex_directions: diff --git a/momaland/envs/ingenious/ingenious_check.py b/momaland/envs/ingenious/ingenious_check.py index 8163b0c9..1d49a6c7 100644 --- a/momaland/envs/ingenious/ingenious_check.py +++ b/momaland/envs/ingenious/ingenious_check.py @@ -348,31 +348,9 @@ def check_fully_observable(): def check_teammate(): """Test teammate(reward sharing) in ingenous.py.""" - ig_env = MOIngenious(num_players=4,reward_sharing={'agent_0':0, 'agent_1':1,'agent_2':1, 'agent_3':1}) - ig_env.reset() - ag=ig_env.agent_selection - obs = ig_env.observe(ag) - index = random_index_of_one(ig_env.game.return_action_list()) - ig_env.step(index) - print("Observation",obs) - done = False - while not done: - ag = ig_env.agent_selection - print("Agent: ", ag) - obs = ig_env.observe(ag) - masked_act_list = obs["action_mask"] - action = random_index_of_one(masked_act_list) - #print("Observation: ",obs) - print("Action: ", action) - ig_env.step(action) - observation, reward, truncation, termination, _ = ig_env.last() - print("Observations: ", observation['observation']) - print("Rewards: ", reward) - print("Truncation: ", truncation) - print("Termination: ", termination) - done = truncation or termination - print(ig_env.game.score) - ig_env = MOIngenious(num_players=4,reward_sharing={'agent_0':1, 'agent_1':0,'agent_2':0, 'agent_3':1}) + + ig_env = MOIngenious(num_players=4 + ,teammate_mode=True) ig_env.reset() ag=ig_env.agent_selection obs = ig_env.observe(ag) @@ -386,77 +364,66 @@ def check_teammate(): obs = ig_env.observe(ag) masked_act_list = obs["action_mask"] action = random_index_of_one(masked_act_list) - #print("Observation: ",obs) print("Action: ", action) ig_env.step(action) - observation, reward, truncation, termination, _ = ig_env.last() + observation, reward, termination, truncation, _ = ig_env.last() print("Observations: ", observation['observation']) - print("Rewards: ", reward) + print("Rewards: ", reward, "_accumulate_reward(from gymnasium code)",ig_env._cumulative_rewards) print("Truncation: ", truncation) print("Termination: ", termination) done = truncation or termination print(ig_env.game.score) - +def check_parameter_range(): + for n_player in range(5, 7): + for draw in range(2, 7): + for color in range(n_player, 7): + for bs in range(0, 10): + # if bs in [1, 2]: + # continue + # if n_player in range(5, 7): + # bs = 0 + for teammate in [True, False]: + for fully_obs in [True, False]: + print("num_players=", n_player, " init_draw=", draw, "num_colors=", color, "board_size=", bs, + "teammate_mode=",teammate , "fully_obs=", fully_obs, "render_mode=", None) + ig_env = MOIngenious(num_players=n_player, init_draw=draw, num_colors=color, board_size=bs, + teammate_mode=teammate, fully_obs=fully_obs, render_mode=None) + ig_env.reset() + try: + train(ig_env) + print("PASS") + except AssertionError as e: + print(e) if __name__ == "__main__": - # ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8) - # ag = ig_env.agent_selection - # ig_env.reset() - #t1 = test_ingenious_rule() - #t1 = True - # ig_env.reset() - #t1 = test_reset() - # if t1: - # print("Accepted: reset test") - # else: - # print("Rejected: reset test") - # ig_env.reset() - # t3 = test_move() # no need anymore - # t4 = test_step() - # check_fully_observable() - # check_teammate() - - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=0, reward_sharing=None, - fully_obs=False, render_mode=None) - ig_env.reset() - train(ig_env) - #print(ig_env.last()) - for n_player in range(5,7): - for draw in range(2,7): - for color in range(n_player,7): - for bs in range(0,10): - if bs in [1,2]: - continue - if n_player in range(5,7): - bs=0 - print("num_players=",n_player, " init_draw=",draw, "num_colors=", color, "board_size=", bs, "reward_sharing=", None, "fully_obs=", False, "render_mode=", None) - ig_env = MOIngenious(num_players=n_player, init_draw=draw, num_colors=color, board_size=bs, reward_sharing=None, fully_obs=False, render_mode=None) - ig_env.reset() - train(ig_env) + # run this function, you could always find opponents' tiles in observation space + check_fully_observable() - print("PASS") + #check teammate_mode through simulation, it could be found that teammates always share the same score in score board. + check_teammate() + #check parameter range by ramdom choose. + check_parameter_range() + """ + ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=0, teammate_mode=False, + fully_obs=False, render_mode=None) + ig_env.reset() + train(ig_env) + try: + assert False, "this code runs, fails, and the exception is caught" + except AssertionError as e: + print(repr(e)) + ig_env.reset() + train(ig_env) + #print(ig_env.last()) + + print("PASS") - #if t1: - # print("Accepted: ingenious rule test") - #else: - # print("Rejected: ingenious rule test") - #if t2: - # print("Accepted: reset test") - #else: - # print("Rejected: reset test") - # if t3: - # print("Accepted: move in ingenious_base test") - # else: - # print("Rejected: move in ingenious_base test") - #if t4: - # print("Accepted: move in step test") - #else: - # print("Rejected: move in step test") + """ From d1c5523962744664a2dc6a2d63e5aba9d687cbf1 Mon Sep 17 00:00:00 2001 From: penggao00 Date: Thu, 14 Mar 2024 13:35:18 +0100 Subject: [PATCH 06/18] comment the check --- momaland/envs/ingenious/ingenious_base.py | 4 +- momaland/envs/ingenious/ingenious_check.py | 115 +++++++-------------- 2 files changed, 41 insertions(+), 78 deletions(-) diff --git a/momaland/envs/ingenious/ingenious_base.py b/momaland/envs/ingenious/ingenious_base.py index e8a54111..f75179df 100644 --- a/momaland/envs/ingenious/ingenious_base.py +++ b/momaland/envs/ingenious/ingenious_base.py @@ -286,7 +286,7 @@ def set_action_index(self, index): self.exclude_action(h2) if self.first_round: #if first round, each player should take different corner - print(h1,h2) + # print('first round', h1,h2) self.exclude_position_first_round(h1) self.exclude_position_first_round(h2) @@ -422,7 +422,7 @@ def exclude_position_first_round(self, pos): if hex_scale(neighbor_hex, 1.0/(self.board_size-1)) in hex_directions: #neighbor_hex is corner a=neighbor_hex - print("find the corner to remove in first round",a,pos) + # print("find the corner to remove in first round",a,pos) for k in range(0, 6): hx1 = hex_neighbor(a, k) for j in range(0, 6): diff --git a/momaland/envs/ingenious/ingenious_check.py b/momaland/envs/ingenious/ingenious_check.py index 1d49a6c7..e4ff02f8 100644 --- a/momaland/envs/ingenious/ingenious_check.py +++ b/momaland/envs/ingenious/ingenious_check.py @@ -77,48 +77,12 @@ def test_move(): x1, y1 = Hex2ArrayLocation(h1, ig_env.game.board_size) x2, y2 = Hex2ArrayLocation(h2, ig_env.game.board_size) - if ig_env.game.board_array[x1][y1] != 0 or ig_env.game.board_array[x2][y2] != 0: - print("reason1") - flag = False - return flag - + assert ig_env.game.board_array[x1][y1] == 0 and ig_env.game.board_array[x2][y2] == 0,"Place on board is taken." ag = ig_env.agent_selection c1, c2 = ig_env.game.p_tiles[ag][card] - - # print(c1,c2,ig_env.game.board_array[x1][y1],ig_env.game.board_array[x2][y2] ) - # print(ig_env.game.return_action_list()[index]) ig_env.game.set_action_index(index) - # ig_env.step(index) - # print('after',c1, c2, ig_env.game.board_array[x1][y1], ig_env.game.board_array[x2][y2]) - ag = ig_env.agent_selection - if ig_env.game.board_array[x1][y1] != c1 or ig_env.game.board_array[x2][y2] != c2: - flag = False - print("reason2") - return flag - """ - According to last time reviewer's suggestion, the illegal move will use 'assert' to stop the program instead of warning; this stops my test code to verify the illegal move. - - # check illegal move : put somewhere not allowed - index = random_index_of_zero(ig_env.game.return_action_list()) - if ig_env.game.set_action_index(index): - print("reason3") - flag = False - return flag - - # check illegal move : put some tile out of hand - index = random_index_of_one(ig_env.game.return_action_list()) - - ag = ig_env.game.agents[ig_env.game.agent_selector] - # h1, h2, card = ig_env.game.action_index_map[index] - ig_env.game.p_tiles[ag].clear() - - if ig_env.game.set_action_index(index): - print("reason4") - flag = False - return flag - return flag - """ - + assert ig_env.game.board_array[x1][y1] == c1 and ig_env.game.board_array[x2][y2] == c2,"Color is not placed correctly." + print("ingenious_base basic move Passed") def test_step(): """Test move correctly in ingenious_base. @@ -214,15 +178,21 @@ def test_reset(): flag = False if len(ig_env.game.tiles_bag) < 100: flag = False + if flag: + print("Reset test Passed") + else: + print("Reset test Rejected") return flag def test_ingenious_rule(): """Ingenious rule test in a small case setting; when game end successfully, no agent should successively play 3 times.""" - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8, limitation_score=10) + ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8) ag = -1 sum = 0 ig_env.reset() + ig_env.game.limitation_score=5 + done = False if_exeed = True if_ingenious = False @@ -245,6 +215,9 @@ def test_ingenious_rule(): if sum == 1: if_ingenious = True break + if if_ingenious and if_exeed: + print("Ingenious rule check Passed") + return if_ingenious and if_exeed @@ -255,8 +228,8 @@ def test_API(): ag = ig_env.agent_selection obs = ig_env.observe(ag) masked_act_list = obs["action_mask"] - print(sum(masked_act_list)) - print(sum(ig_env.game.masked_action)) + # print(sum(masked_act_list)) + # print(sum(ig_env.game.masked_action)) env = ig_env env.reset() # observation_0 @@ -274,8 +247,8 @@ def test_API(): } for agent in env.agent_iter(env.num_agents * num_cycles): generated_agents.add(agent) - print(agent, has_finished, generated_agents) - print(env.last()) + # print(agent, has_finished, generated_agents) + # print(env.last()) assert agent not in has_finished, "agents cannot resurrect! Generate a new agent with a new name." assert isinstance(env.infos[agent], dict), "an environment agent's info must be a dictionary" prev_observe, reward, terminated, truncated, info = env.last() @@ -328,15 +301,17 @@ def test_API(): if isinstance(env.observation_space(agent), gymnasium.spaces.Box): assert env.observation_space(agent).dtype == prev_observe.dtype - assert env.observation_space(agent).contains(prev_observe), "Out of bounds observation: " + str(prev_observe) - assert env.observation_space(agent).contains(prev_observe), "Agent's observation is outside of it's observation space" + #These codes are some left codes no need anymore for action is already taken in the env and test_observation not used anymore. + # assert env.observation_space(agent).contains(prev_observe), "Out of bounds observation: " + str(prev_observe) + # assert env.observation_space(agent).contains(prev_observe), "Agent's observation is outside of it's observation space" # test_observation(prev_observe, observation_0) if not isinstance(env.infos[env.agent_selection], dict): print("The info of each agent should be a dict, use {} if you aren't using info") if not env.agents: assert has_finished == generated_agents, "not all agents finished, some were skipped over" + print("API ingenious.py Passed") def check_fully_observable(): """Test observable trigger in ingenous.py.""" @@ -376,54 +351,42 @@ def check_teammate(): def check_parameter_range(): - for n_player in range(5, 7): + for n_player in range(2, 7): for draw in range(2, 7): for color in range(n_player, 7): for bs in range(0, 10): - # if bs in [1, 2]: - # continue - # if n_player in range(5, 7): - # bs = 0 for teammate in [True, False]: for fully_obs in [True, False]: print("num_players=", n_player, " init_draw=", draw, "num_colors=", color, "board_size=", bs, "teammate_mode=",teammate , "fully_obs=", fully_obs, "render_mode=", None) - ig_env = MOIngenious(num_players=n_player, init_draw=draw, num_colors=color, board_size=bs, - teammate_mode=teammate, fully_obs=fully_obs, render_mode=None) - ig_env.reset() + try: + ig_env = MOIngenious(num_players=n_player, init_draw=draw, num_colors=color, + board_size=bs, + teammate_mode=teammate, fully_obs=fully_obs, render_mode=None) + ig_env.reset() train(ig_env) - print("PASS") - except AssertionError as e: + except Exception as e: print(e) + pass + else: + print("PASS") if __name__ == "__main__": + # test move of inginous_base.py + test_move() + # test API + test_API() + # test inginious rule + test_ingenious_rule() + # run this function, you could always find opponents' tiles in observation space check_fully_observable() #check teammate_mode through simulation, it could be found that teammates always share the same score in score board. check_teammate() - #check parameter range by ramdom choose. + # check parameter range by ramdom choose. check_parameter_range() - - """ - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=0, teammate_mode=False, - fully_obs=False, render_mode=None) - ig_env.reset() - train(ig_env) - try: - assert False, "this code runs, fails, and the exception is caught" - except AssertionError as e: - print(repr(e)) - - ig_env.reset() - train(ig_env) - #print(ig_env.last()) - - - print("PASS") - - """ From 6a4da56d8125be84b8d819036288716546444e6b Mon Sep 17 00:00:00 2001 From: penggao00 Date: Thu, 14 Mar 2024 14:43:09 +0100 Subject: [PATCH 07/18] without document --- momaland/envs/ingenious/ingenious.py | 92 ++++++---------------- momaland/envs/ingenious/ingenious_base.py | 23 +++--- momaland/envs/ingenious/ingenious_check.py | 77 ++++++++++-------- 3 files changed, 80 insertions(+), 112 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 6e099683..9dcc04b9 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -1,58 +1,8 @@ """Multi-objective Ingenious environment for MOMAland. -This environment is based on the Ingenious game: https://boardgamegeek.com/boardgame/9674/ingenious -Every color is a different objective. The goal in the original game is to maximize the minimum score over all colors, -however we leave the utility wrapper up to the users and only return the vectorial score on each color dimension. -|---|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Agents names | `agent_i for i in [0, 5]` | -| Action Space | Discrete(5544) | -| Observation Space | Dict('action_mask': Box(0, 1, (5544,), int8), 'observation': Dict('board': Box(0.0, 6.0, (15, 15), float32), 'scores': Box(0, 18, (6,), int32), 'tiles': Box(0, 6, (6, 2), int32))) | -| Reward Space | Box(0.0, 18.0, (6,), float32) | -| Import | `momaland.envs.moingenious_v0` - -## Observation Space - -Non Fixed size of the board????? - -The observation space is a continuous box with the length `(num_drones + 1) * 3` where each 3 values represent the XYZ coordinates of the drones in this order: -- the agent. -- the target. -- the other agents. - -Example: - - -## Action Space -The action space is a discrete index representing the move that put tile with color(c1,c2) to the position (h1,h2). - -## Reward Space -The reward space is a 2D vector containing rewards for: -- After certain action, for the current player i, the difference between the old score and the new score for each color in the score board. - -## Starting State -TODO - -## Episode Termination -The episode is terminated if one of the following conditions are met: -- The board is filled. -- Sequential "ingenious" move until using up the tiles.(Complemented rule for winning). - -## Episode Truncation -TODO - -## Init Function -def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, reward_sharing=None, fully_obs=False, render_mode=None,) -- num_players (int): The number of players in the environment. Default: 2 -- init_draw (int): The number of tiles each player draws at the beginning of the game. Default: 6 -- num_colors (int): The number of colors in the game. Default: 4 -- board_size (int): The size of the board. Default: 0 (0 means the board size id dependent on num_players like { 2:6, 3:7 , 4:8}; otherwise, set the board_size freely between 3 and 8) -- reward_sharing: Partnership Game.It should be a set like {'agent_0':0, 'agent_1':0,'agent_2':1, 'agent_3':1} where teammates will share the reward. Default: None -- fully_obs: Fully observable or not. Default:False -- render_mode (str): The rendering mode. Default: None - +To Write. """ - import functools import random @@ -94,7 +44,16 @@ class MOIngenious(MOAECEnv): metadata = {"render_modes": ["human"], "name": "moingenious_v0", "is_parallelizable": False} - def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, teammate_mode=False, fully_obs=False, render_mode=None,): + def __init__( + self, + num_players=2, + init_draw=6, + num_colors=6, + board_size=0, + teammate_mode=False, + fully_obs=False, + render_mode=None, + ): """Initializes the multi-objective Ingenious game. Args: @@ -106,19 +65,18 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, teamm fully_obs: Fully observable or not. Default:False render_mode (str): The rendering mode. Default: None """ - self.num_colors = num_colors self.init_draw = init_draw self.num_players = num_players - self.limitation_score = 18 # max score in score board for one certain color. - self.teammate_mode=teammate_mode + self.limitation_score = 18 # max score in score board for one certain color. + self.teammate_mode = teammate_mode if self.teammate_mode is True: - assert num_players%2 == 0, "Number of players must be even if teammate_mode is on." - self.limitation_score=self.limitation_score*(num_players/2) + assert num_players % 2 == 0, "Number of players must be even if teammate_mode is on." + self.limitation_score = self.limitation_score * (num_players / 2) self.fully_obs = fully_obs if board_size == 0: - self.board_size = { 2:6, 3:7, 4:8, 5:9, 6:10}.get(self.num_players) + self.board_size = {2: 6, 3: 7, 4: 8, 5: 9, 6: 10}.get(self.num_players) else: self.board_size = board_size @@ -152,7 +110,7 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=0, teamm "board": Box( 0, len(ALL_COLORS), shape=(2 * self.board_size - 1, 2 * self.board_size - 1), dtype=np.float32 ), - "tiles": Box(0, self.num_colors, shape=(self.init_draw, ), dtype=np.int32), + "tiles": Box(0, self.num_colors, shape=(self.init_draw,), dtype=np.int32), "scores": Box(0, self.game.limitation_score, shape=(self.num_colors,), dtype=np.int32), } ), @@ -233,7 +191,7 @@ def step(self, action): if self.refresh_cumulative_reward: self._cumulative_rewards[current_agent] = np.zeros(self.num_colors, dtype="float64") - #update current agent + # update current agent if not self.game.end_flag: prev_rewards = np.array(list(self.game.score[current_agent].values())) self.game.set_action_index(action) @@ -245,12 +203,12 @@ def step(self, action): # update teammate score(copy current agent score to the teammate) if self.teammate_mode is True: - index_current_agent=self.agents.index(current_agent) - for i in range(0,self.num_players): - if i!=index_current_agent and i%2==index_current_agent%2: - agent=self.agents[i] - self.game.score[agent]=self.game.score[current_agent] - self.rewards[agent]= self.rewards[current_agent] + index_current_agent = self.agents.index(current_agent) + for i in range(0, self.num_players): + if i != index_current_agent and i % 2 == index_current_agent % 2: + agent = self.agents[i] + self.game.score[agent] = self.game.score[current_agent] + self.rewards[agent] = self.rewards[current_agent] # update accumulate_rewards self._accumulate_rewards() @@ -263,8 +221,6 @@ def step(self, action): else: self.refresh_cumulative_reward = False - - @override def observe(self, agent): board_vals = np.array(self.game.board_array, dtype=np.float32) diff --git a/momaland/envs/ingenious/ingenious_base.py b/momaland/envs/ingenious/ingenious_base.py index f75179df..b3add364 100644 --- a/momaland/envs/ingenious/ingenious_base.py +++ b/momaland/envs/ingenious/ingenious_base.py @@ -114,7 +114,7 @@ def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limit assert 2 <= num_colors <= 6, "Number of colors must be between 2 and 6." assert 2 <= init_draw <= 6, "Number of tiles in hand must be between 2 and 6." assert 3 <= board_size <= 10, "Board size must be between 3 and 10." - assert num_players<=num_colors,"Number of players should be smaller than number of colors. " + assert num_players <= num_colors, "Number of players should be smaller than number of colors. " self.board_size = board_size self.num_player = num_players @@ -239,12 +239,10 @@ def tiles_bag_reset(self): self.tiles_bag = int(NUM_TILES / len(diff_color_combinations + same_color_combinations)) * ( diff_color_combinations + same_color_combinations ) - #print(len(self.tiles_bag)) - if self.board_size in [9,10]: + # print(len(self.tiles_bag)) + if self.board_size in [9, 10]: # cannot fill the board for 9 or 10(complement rule) - self.tiles_bag*=2 - - + self.tiles_bag *= 2 # Shuffle the tiles bag self.random.shuffle(self.tiles_bag) @@ -285,7 +283,7 @@ def set_action_index(self, index): self.exclude_action(h1) self.exclude_action(h2) if self.first_round: - #if first round, each player should take different corner + # if first round, each player should take different corner # print('first round', h1,h2) self.exclude_position_first_round(h1) self.exclude_position_first_round(h2) @@ -415,13 +413,14 @@ def log(self): print(self.board_array) print(self.score) print(self.p_tiles) + def exclude_position_first_round(self, pos): - """Exclude available position in self.first_round_pos to ensure that each player begins with a different corner ( each corner is taken once). """ + """Exclude available position in self.first_round_pos to ensure that each player begins with a different corner ( each corner is taken once).""" for i in range(0, 6): neighbor_hex = hex_neighbor(pos, i) - if hex_scale(neighbor_hex, 1.0/(self.board_size-1)) in hex_directions: - #neighbor_hex is corner - a=neighbor_hex + if hex_scale(neighbor_hex, 1.0 / (self.board_size - 1)) in hex_directions: + # neighbor_hex is corner + a = neighbor_hex # print("find the corner to remove in first round",a,pos) for k in range(0, 6): hx1 = hex_neighbor(a, k) @@ -437,5 +436,3 @@ def exclude_position_first_round(self, pos): if c2 in self.first_round_pos: self.first_round_pos.remove(c2) break - - diff --git a/momaland/envs/ingenious/ingenious_check.py b/momaland/envs/ingenious/ingenious_check.py index e4ff02f8..e35eb721 100644 --- a/momaland/envs/ingenious/ingenious_check.py +++ b/momaland/envs/ingenious/ingenious_check.py @@ -63,13 +63,11 @@ def test_move(): ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8) ig_env.reset() # print(ig_env.game.board_array, "nweowjrowhafhif!!!!!!!!!") - flag = True # action map insist the same with index map for i in ig_env.game.action_index_map: h = ig_env.game.action_map.get(ig_env.game.action_index_map[i]) if h is None or h != i: - flag = False break # check legal move index = random_index_of_one(ig_env.game.return_action_list()) @@ -77,13 +75,14 @@ def test_move(): x1, y1 = Hex2ArrayLocation(h1, ig_env.game.board_size) x2, y2 = Hex2ArrayLocation(h2, ig_env.game.board_size) - assert ig_env.game.board_array[x1][y1] == 0 and ig_env.game.board_array[x2][y2] == 0,"Place on board is taken." + assert ig_env.game.board_array[x1][y1] == 0 and ig_env.game.board_array[x2][y2] == 0, "Place on board is taken." ag = ig_env.agent_selection c1, c2 = ig_env.game.p_tiles[ag][card] ig_env.game.set_action_index(index) - assert ig_env.game.board_array[x1][y1] == c1 and ig_env.game.board_array[x2][y2] == c2,"Color is not placed correctly." + assert ig_env.game.board_array[x1][y1] == c1 and ig_env.game.board_array[x2][y2] == c2, "Color is not placed correctly." print("ingenious_base basic move Passed") + def test_step(): """Test move correctly in ingenious_base. @@ -95,7 +94,6 @@ def test_step(): # check legal step - ag = ig_env.agent_selection obs = ig_env.observe(ag) @@ -131,7 +129,7 @@ def test_step(): print("reason3") flag = False return flag - + # check illegal move : put some tile out of hand index = random_index_of_one(ig_env.game.masked_action) ag = ig_env.agent_selection @@ -144,10 +142,8 @@ def test_step(): return flag # check selector - - """ - + """ return flag @@ -191,7 +187,7 @@ def test_ingenious_rule(): ag = -1 sum = 0 ig_env.reset() - ig_env.game.limitation_score=5 + ig_env.game.limitation_score = 5 done = False if_exeed = True @@ -224,10 +220,9 @@ def test_ingenious_rule(): def test_API(): """Test observe interface in ingenous.py.""" ig_env = MOIngenious() - ig_env.limitation_score=10000 - ag = ig_env.agent_selection - obs = ig_env.observe(ag) - masked_act_list = obs["action_mask"] + ig_env.limitation_score = 10000 + # ag = ig_env.agent_selection + # obs = ig_env.observe(ag) # print(sum(masked_act_list)) # print(sum(ig_env.game.masked_action)) env = ig_env @@ -302,7 +297,7 @@ def test_API(): if isinstance(env.observation_space(agent), gymnasium.spaces.Box): assert env.observation_space(agent).dtype == prev_observe.dtype - #These codes are some left codes no need anymore for action is already taken in the env and test_observation not used anymore. + # These codes are some left codes no need anymore for action is already taken in the env and test_observation not used anymore. # assert env.observation_space(agent).contains(prev_observe), "Out of bounds observation: " + str(prev_observe) # assert env.observation_space(agent).contains(prev_observe), "Agent's observation is outside of it's observation space" # test_observation(prev_observe, observation_0) @@ -313,25 +308,25 @@ def test_API(): assert has_finished == generated_agents, "not all agents finished, some were skipped over" print("API ingenious.py Passed") + def check_fully_observable(): """Test observable trigger in ingenous.py.""" ig_env = MOIngenious(fully_obs=True) ig_env.reset() - ag=ig_env.agent_selection + ag = ig_env.agent_selection obs = ig_env.observe(ag) - print("Observation",obs) + print("Observation", obs) + def check_teammate(): """Test teammate(reward sharing) in ingenous.py.""" - - ig_env = MOIngenious(num_players=4 - ,teammate_mode=True) + ig_env = MOIngenious(num_players=4, teammate_mode=True) ig_env.reset() - ag=ig_env.agent_selection + ag = ig_env.agent_selection obs = ig_env.observe(ag) index = random_index_of_one(ig_env.game.return_action_list()) ig_env.step(index) - print("Observation",obs) + print("Observation", obs) done = False while not done: ag = ig_env.agent_selection @@ -342,8 +337,8 @@ def check_teammate(): print("Action: ", action) ig_env.step(action) observation, reward, termination, truncation, _ = ig_env.last() - print("Observations: ", observation['observation']) - print("Rewards: ", reward, "_accumulate_reward(from gymnasium code)",ig_env._cumulative_rewards) + print("Observations: ", observation["observation"]) + print("Rewards: ", reward, "_accumulate_reward(from gymnasium code)", ig_env._cumulative_rewards) print("Truncation: ", truncation) print("Termination: ", termination) done = truncation or termination @@ -351,19 +346,40 @@ def check_teammate(): def check_parameter_range(): + """Simulate all possible parameter to test the game with random choices.""" for n_player in range(2, 7): for draw in range(2, 7): for color in range(n_player, 7): for bs in range(0, 10): for teammate in [True, False]: for fully_obs in [True, False]: - print("num_players=", n_player, " init_draw=", draw, "num_colors=", color, "board_size=", bs, - "teammate_mode=",teammate , "fully_obs=", fully_obs, "render_mode=", None) + print( + "num_players=", + n_player, + " init_draw=", + draw, + "num_colors=", + color, + "board_size=", + bs, + "teammate_mode=", + teammate, + "fully_obs=", + fully_obs, + "render_mode=", + None, + ) try: - ig_env = MOIngenious(num_players=n_player, init_draw=draw, num_colors=color, - board_size=bs, - teammate_mode=teammate, fully_obs=fully_obs, render_mode=None) + ig_env = MOIngenious( + num_players=n_player, + init_draw=draw, + num_colors=color, + board_size=bs, + teammate_mode=teammate, + fully_obs=fully_obs, + render_mode=None, + ) ig_env.reset() train(ig_env) except Exception as e: @@ -384,9 +400,8 @@ def check_parameter_range(): # run this function, you could always find opponents' tiles in observation space check_fully_observable() - #check teammate_mode through simulation, it could be found that teammates always share the same score in score board. + # check teammate_mode through simulation, it could be found that teammates always share the same score in score board. check_teammate() # check parameter range by ramdom choose. check_parameter_range() - From 9e7e2a3e66075bb44c4c32b5f7581f88f5ee831b Mon Sep 17 00:00:00 2001 From: threepwoody Date: Tue, 19 Mar 2024 17:08:11 +0100 Subject: [PATCH 08/18] Fixing game parameters --- momaland/envs/ingenious/ingenious.py | 50 +++++++++---------- momaland/envs/ingenious/ingenious_base.py | 24 ++++----- momaland/envs/ingenious/ingenious_check.py | 23 +++++---- momaland/envs/ingenious/ingenious_seedtest.py | 4 +- 4 files changed, 52 insertions(+), 49 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 9dcc04b9..d4dd0a0e 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -46,33 +46,33 @@ class MOIngenious(MOAECEnv): def __init__( self, - num_players=2, - init_draw=6, - num_colors=6, - board_size=0, - teammate_mode=False, - fully_obs=False, - render_mode=None, + num_agents: int = 2, + rack_size: int = 6, + num_colors: int = 6, + board_size: int = None, + reward_mode: str = "competitive", # TODO needs implementation + fully_obs: bool = False, + render_mode: bool = None, ): - """Initializes the multi-objective Ingenious game. + """Initializes the Ingenious environment. Args: - num_players (int): The number of players in the environment. Default: 2 - init_draw (int): The number of tiles each player draws at the beginning of the game. Default: 6 - num_colors (int): The number of colors in the game. Default: 4 - board_size (int): The size of the board. Default: 0 (0 means the board size id dependent on num_players like { 2:6, 3:7 , 4:8}; otherwise, set the board_size freely between 3 and 8) - teammate_mode: Partnership Game or not. Default:False - fully_obs: Fully observable or not. Default:False + num_agents (int): The number of agents (between 2 and 6). Default is 2. + rack_size (int): The number of tiles each player keeps in their rack (between 2 and 6). Default is 6. + num_colors (int): The number of colors (objectives) in the game (between 2 and 6). Default is 6. + board_size (int): The size of one side of the hexagonal board (between 3 and 10). By default the size is set to n+4 where n is the number of agents. + reward_mode (str): Can be set to "competitive" (individual rewards for all agents), "collaborative" (shared rewards for all agents), or "two_teams" (rewards shared within two opposing teams; num_agents needs to be even). Default is "competitive". + fully_obs (bool): Fully observable game mode, i.e. the racks of all players are visible. Default is False. render_mode (str): The rendering mode. Default: None """ self.num_colors = num_colors - self.init_draw = init_draw - self.num_players = num_players + self.init_draw = rack_size + self.num_players = num_agents self.limitation_score = 18 # max score in score board for one certain color. - self.teammate_mode = teammate_mode + self.teammate_mode = reward_mode if self.teammate_mode is True: - assert num_players % 2 == 0, "Number of players must be even if teammate_mode is on." - self.limitation_score = self.limitation_score * (num_players / 2) + assert num_agents % 2 == 0, "Number of players must be even if teammate_mode is on." + self.limitation_score = self.limitation_score * (num_agents / 2) self.fully_obs = fully_obs if board_size == 0: @@ -81,14 +81,14 @@ def __init__( self.board_size = board_size self.game = IngeniousBase( - num_players=self.num_players, - init_draw=self.init_draw, + num_agents=self.num_players, + rack_size=self.init_draw, num_colors=self.num_colors, board_size=self.board_size, - limitation_score=self.limitation_score, + max_score=self.limitation_score, ) - self.possible_agents = ["agent_" + str(r) for r in range(num_players)] + self.possible_agents = ["agent_" + str(r) for r in range(num_agents)] # init list of agent self.agents = self.possible_agents[:] @@ -120,11 +120,11 @@ def __init__( for i in self.agents } - self.action_spaces = dict(zip(self.agents, [Discrete(len(self.game.masked_action))] * num_players)) + self.action_spaces = dict(zip(self.agents, [Discrete(len(self.game.masked_action))] * num_agents)) # The reward after one move is the difference between the previous and current score. self.reward_spaces = dict( - zip(self.agents, [Box(0, self.game.limitation_score, shape=(self.num_colors,))] * num_players) + zip(self.agents, [Box(0, self.game.limitation_score, shape=(self.num_colors,))] * num_agents) ) @functools.lru_cache(maxsize=None) diff --git a/momaland/envs/ingenious/ingenious_base.py b/momaland/envs/ingenious/ingenious_base.py index b3add364..b7eb4313 100644 --- a/momaland/envs/ingenious/ingenious_base.py +++ b/momaland/envs/ingenious/ingenious_base.py @@ -100,30 +100,30 @@ def generate_board(board_size): class IngeniousBase: """Base class for Ingenious environment.""" - def __init__(self, num_players=2, init_draw=6, num_colors=6, board_size=6, limitation_score=18): + def __init__(self, num_agents=2, rack_size=6, num_colors=6, board_size=6, max_score=18): """Initialize the Ingenious environment. Args: - num_players (int): Number of players in the game. - init_draw (int): Number of tiles to draw at the beginning of the game. - num_colors (int): Number of colors in the game. - board_size (int): Size of the board. - limitation_score(int): Limitation to refresh the score board for any color. Default: 20 + num_agents (int): The number of agents (between 2 and 6). Default is 2. + rack_size (int): The number of tiles each player keeps in their rack (between 2 and 6). Default is 6. + num_colors (int): The number of colors (objectives) in the game (between 2 and 6). Default is 6. + board_size (int): The size of one side of the hexagonal board (between 3 and 10). Default is 6. + max_score(int): Maximal score possible for any given color/objective. Default: 18 """ - assert 2 <= num_players <= 6, "Number of players must be between 2 and 6." + assert 2 <= num_agents <= 6, "Number of players must be between 2 and 6." assert 2 <= num_colors <= 6, "Number of colors must be between 2 and 6." - assert 2 <= init_draw <= 6, "Number of tiles in hand must be between 2 and 6." + assert 2 <= rack_size <= 6, "Rack size must be between 2 and 6." assert 3 <= board_size <= 10, "Board size must be between 3 and 10." - assert num_players <= num_colors, "Number of players should be smaller than number of colors. " + assert num_agents <= num_colors, "Number of agents cannot be larger than number of colors. " self.board_size = board_size - self.num_player = num_players + self.num_player = num_agents self.agents = [f"agent_{i}" for i in range(self.num_player)] self.agent_selector = 0 - self.limitation_score = limitation_score + self.limitation_score = max_score self.colors = num_colors self.corner_color = ALL_COLORS - self.init_draw = init_draw + self.init_draw = rack_size self.board_array = np.zeros([2 * self.board_size - 1, 2 * self.board_size - 1]) self.board_hex = generate_board(self.board_size) # original full board self.action_map = {} diff --git a/momaland/envs/ingenious/ingenious_check.py b/momaland/envs/ingenious/ingenious_check.py index e35eb721..c97be489 100644 --- a/momaland/envs/ingenious/ingenious_check.py +++ b/momaland/envs/ingenious/ingenious_check.py @@ -4,8 +4,11 @@ import gymnasium import numpy as np -from ingenious import MOIngenious -from ingenious_base import Hex2ArrayLocation + +from momaland.envs.ingenious.ingenious import MOIngenious + +# from ingenious import MOIngenious +from momaland.envs.ingenious.ingenious_base import Hex2ArrayLocation def train(ig_env): @@ -60,7 +63,7 @@ def test_move(): Returns: True or False """ - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8) + ig_env = MOIngenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) ig_env.reset() # print(ig_env.game.board_array, "nweowjrowhafhif!!!!!!!!!") @@ -88,7 +91,7 @@ def test_step(): Returns: True or False """ - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8) + ig_env = MOIngenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) ig_env.reset() flag = True @@ -154,7 +157,7 @@ def test_reset(): Returns: True or False """ - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=4) + ig_env = MOIngenious(num_agents=2, rack_size=2, num_colors=2, board_size=4) ig_env.reset(105) train(ig_env) ig_env.reset(110) @@ -183,7 +186,7 @@ def test_reset(): def test_ingenious_rule(): """Ingenious rule test in a small case setting; when game end successfully, no agent should successively play 3 times.""" - ig_env = MOIngenious(num_players=2, init_draw=2, num_colors=2, board_size=8) + ig_env = MOIngenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) ag = -1 sum = 0 ig_env.reset() @@ -320,7 +323,7 @@ def check_fully_observable(): def check_teammate(): """Test teammate(reward sharing) in ingenous.py.""" - ig_env = MOIngenious(num_players=4, teammate_mode=True) + ig_env = MOIngenious(num_agents=4, reward_mode=True) ig_env.reset() ag = ig_env.agent_selection obs = ig_env.observe(ag) @@ -372,11 +375,11 @@ def check_parameter_range(): try: ig_env = MOIngenious( - num_players=n_player, - init_draw=draw, + num_agents=n_player, + rack_size=draw, num_colors=color, board_size=bs, - teammate_mode=teammate, + reward_mode=teammate, fully_obs=fully_obs, render_mode=None, ) diff --git a/momaland/envs/ingenious/ingenious_seedtest.py b/momaland/envs/ingenious/ingenious_seedtest.py index a9215f36..acfa41c9 100644 --- a/momaland/envs/ingenious/ingenious_seedtest.py +++ b/momaland/envs/ingenious/ingenious_seedtest.py @@ -165,9 +165,9 @@ def data_equivalence(data_1, data_2) -> bool: if __name__ == "__main__": - ig_env = MOIngenious(num_players=4, init_draw=4, num_colors=4, board_size=8) + ig_env = MOIngenious(num_agents=4, rack_size=4, num_colors=4, board_size=8) - ig_env2 = MOIngenious(num_players=4, init_draw=4, num_colors=4, board_size=8) + ig_env2 = MOIngenious(num_agents=4, rack_size=4, num_colors=4, board_size=8) env1 = ig_env env2 = ig_env2 From 2677d7cda603daf99c2673dae11f0cd392134d69 Mon Sep 17 00:00:00 2001 From: penggao00 Date: Tue, 19 Mar 2024 20:01:35 +0100 Subject: [PATCH 09/18] implement collaborative and two_team --- momaland/envs/ingenious/ingenious.py | 34 +++++++++++---- momaland/envs/ingenious/ingenious_check.py | 49 ++++++++++++++++++---- 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index d4dd0a0e..f753c839 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -50,7 +50,7 @@ def __init__( rack_size: int = 6, num_colors: int = 6, board_size: int = None, - reward_mode: str = "competitive", # TODO needs implementation + reward_mode: str = "competitive", fully_obs: bool = False, render_mode: bool = None, ): @@ -69,13 +69,21 @@ def __init__( self.init_draw = rack_size self.num_players = num_agents self.limitation_score = 18 # max score in score board for one certain color. + assert reward_mode in { + "competitive", + "collaborative", + "two_teams", + }, "reward_mode has to be one element in {'competitive','collaborative','two_teams'}" self.teammate_mode = reward_mode - if self.teammate_mode is True: - assert num_agents % 2 == 0, "Number of players must be even if teammate_mode is on." + + if self.teammate_mode == "two_teams": + assert num_agents % 2 == 0, "Number of players must be even if teammate_mode is two_teams." self.limitation_score = self.limitation_score * (num_agents / 2) + elif self.teammate_mode == "collaborative": + self.limitation_score = self.limitation_score * num_agents self.fully_obs = fully_obs - if board_size == 0: + if board_size is None: self.board_size = {2: 6, 3: 7, 4: 8, 5: 9, 6: 10}.get(self.num_players) else: self.board_size = board_size @@ -202,13 +210,21 @@ def step(self, action): self.terminations = {agent: True for agent in self.agents} # update teammate score(copy current agent score to the teammate) - if self.teammate_mode is True: + if self.teammate_mode != "competitive": index_current_agent = self.agents.index(current_agent) for i in range(0, self.num_players): - if i != index_current_agent and i % 2 == index_current_agent % 2: - agent = self.agents[i] - self.game.score[agent] = self.game.score[current_agent] - self.rewards[agent] = self.rewards[current_agent] + if self.teammate_mode == "two_teams": + # two team mode, players who is teammates of the current agent has the same reward and score + if i != index_current_agent and i % 2 == index_current_agent % 2: + agent = self.agents[i] + self.game.score[agent] = self.game.score[current_agent] + self.rewards[agent] = self.rewards[current_agent] + elif self.teammate_mode == "collaborative": + # collabarotive mode, every player has the same reward and score + if i != index_current_agent: + agent = self.agents[i] + self.game.score[agent] = self.game.score[current_agent] + self.rewards[agent] = self.rewards[current_agent] # update accumulate_rewards self._accumulate_rewards() diff --git a/momaland/envs/ingenious/ingenious_check.py b/momaland/envs/ingenious/ingenious_check.py index c97be489..1e8e3076 100644 --- a/momaland/envs/ingenious/ingenious_check.py +++ b/momaland/envs/ingenious/ingenious_check.py @@ -319,16 +319,47 @@ def check_fully_observable(): ag = ig_env.agent_selection obs = ig_env.observe(ag) print("Observation", obs) + print("Fully Observable: Pass") -def check_teammate(): +def check_two_team(): """Test teammate(reward sharing) in ingenous.py.""" - ig_env = MOIngenious(num_agents=4, reward_mode=True) + ig_env = MOIngenious(num_agents=4, reward_mode="two_teams") ig_env.reset() ag = ig_env.agent_selection obs = ig_env.observe(ag) - index = random_index_of_one(ig_env.game.return_action_list()) - ig_env.step(index) + # index = random_index_of_one(ig_env.game.return_action_list()) + # ig_env.step(index) + print("Start check_two_team") + print("Observation", obs) + done = False + while not done: + ag = ig_env.agent_selection + print("Agent: ", ag) + obs = ig_env.observe(ag) + masked_act_list = obs["action_mask"] + action = random_index_of_one(masked_act_list) + print("Action: ", action) + ig_env.step(action) + observation, reward, termination, truncation, _ = ig_env.last() + print("Observations: ", observation["observation"]) + print("Rewards: ", reward, "_accumulate_reward(from gymnasium code)", ig_env._cumulative_rewards) + print("Truncation: ", truncation) + print("Termination: ", termination) + done = truncation or termination + print(ig_env.game.score) + print("Stop check_two_team") + + +def check_collaborative(): + """Test teammate(reward sharing) in ingenous.py.""" + ig_env = MOIngenious(num_agents=4, reward_mode="collaborative") + ig_env.reset() + ag = ig_env.agent_selection + obs = ig_env.observe(ag) + # index = random_index_of_one(ig_env.game.return_action_list()) + # ig_env.step(index) + print("Start check_collaborative") print("Observation", obs) done = False while not done: @@ -346,6 +377,7 @@ def check_teammate(): print("Termination: ", termination) done = truncation or termination print(ig_env.game.score) + print("Stop check_collaborative") def check_parameter_range(): @@ -354,7 +386,7 @@ def check_parameter_range(): for draw in range(2, 7): for color in range(n_player, 7): for bs in range(0, 10): - for teammate in [True, False]: + for teammate in ["competitive", "collaborative", "two_teams"]: for fully_obs in [True, False]: print( "num_players=", @@ -403,8 +435,11 @@ def check_parameter_range(): # run this function, you could always find opponents' tiles in observation space check_fully_observable() - # check teammate_mode through simulation, it could be found that teammates always share the same score in score board. - check_teammate() + # check two_team mode through simulation, it could be found that teammates always share the same score in score board. + check_two_team() + + # check collaborative mode through simulation, it could be found that every players always share the same score in score board. + check_collaborative() # check parameter range by ramdom choose. check_parameter_range() From 5d0085d3c826971e840d91f650422f064b5fb062 Mon Sep 17 00:00:00 2001 From: threepwoody Date: Tue, 19 Mar 2024 21:29:52 +0100 Subject: [PATCH 10/18] Improved some comments and variable naming in Ingenious --- momaland/envs/ingenious/ingenious.py | 79 ++++++------ momaland/envs/ingenious/ingenious_base.py | 116 ++++++++---------- momaland/envs/ingenious/ingenious_check.py | 22 ++-- momaland/envs/ingenious/ingenious_seedtest.py | 6 +- 4 files changed, 103 insertions(+), 120 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index f753c839..d42b2f87 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -1,6 +1,16 @@ -"""Multi-objective Ingenious environment for MOMAland. +"""Ingenious environment. + +|--------------------|--------------------------------------------------| +| Actions | Discrete | +| Parallel API | No | +| Manual Control | No | +| Agents | 2 | +| Action Shape | (1,) | +| Action Values | Discrete(board_width=8 * board_height=8 * 3) | +| Observation Shape | (board_height=8, board_width=8, 2) | +| Observation Values | [0,1] | +| Reward Shape | (num_objectives=4,) | -To Write. """ import functools @@ -19,13 +29,13 @@ def env(**kwargs): - """Autowrapper for multi-objective Ingenious game. + """Returns the wrapped Ingenious environment in `AEC` format. Args: - **kwargs: keyword args to forward to the parallel_env function + **kwargs: keyword args to forward to the raw_env function Returns: - A fully wrapped env + A fully wrapped AEC env """ env = raw_env(**kwargs) @@ -35,12 +45,12 @@ def env(**kwargs): def raw_env(**kwargs): - """Env factory function for multi-objective Ingenious game.""" - return MOIngenious(**kwargs) + """Env factory function for the Ingenious environment.""" + return Ingenious(**kwargs) -class MOIngenious(MOAECEnv): - """Environment for the multi-objective Ingenious game.""" +class Ingenious(MOAECEnv): + """Environment for the Ingenious board game.""" metadata = {"render_modes": ["human"], "name": "moingenious_v0", "is_parallelizable": False} @@ -67,29 +77,28 @@ def __init__( """ self.num_colors = num_colors self.init_draw = rack_size - self.num_players = num_agents self.limitation_score = 18 # max score in score board for one certain color. assert reward_mode in { "competitive", "collaborative", "two_teams", }, "reward_mode has to be one element in {'competitive','collaborative','two_teams'}" - self.teammate_mode = reward_mode + self.reward_mode = reward_mode + self.fully_obs = fully_obs - if self.teammate_mode == "two_teams": - assert num_agents % 2 == 0, "Number of players must be even if teammate_mode is two_teams." + if self.reward_mode == "two_teams": + assert num_agents % 2 == 0, "Number of players must be even if reward_mode is two_teams." self.limitation_score = self.limitation_score * (num_agents / 2) - elif self.teammate_mode == "collaborative": + elif self.reward_mode == "collaborative": self.limitation_score = self.limitation_score * num_agents - self.fully_obs = fully_obs if board_size is None: - self.board_size = {2: 6, 3: 7, 4: 8, 5: 9, 6: 10}.get(self.num_players) + self.board_size = {2: 6, 3: 7, 4: 8, 5: 9, 6: 10}.get(self.num_agents) else: self.board_size = board_size self.game = IngeniousBase( - num_agents=self.num_players, + num_agents=self.num_agents, rack_size=self.init_draw, num_colors=self.num_colors, board_size=self.board_size, @@ -97,9 +106,7 @@ def __init__( ) self.possible_agents = ["agent_" + str(r) for r in range(num_agents)] - # init list of agent self.agents = self.possible_agents[:] - self.terminations = {agent: False for agent in self.agents} self.truncations = {agent: False for agent in self.agents} self.infos = {agent: {} for agent in self.agents} @@ -108,8 +115,6 @@ def __init__( self.refresh_cumulative_reward = True self.render_mode = render_mode - # Observation space is a dict of 2 elements: actions mask and game state (board, agent own tile bag, - # agent score) self.observation_spaces = { i: Dict( { @@ -119,7 +124,7 @@ def __init__( 0, len(ALL_COLORS), shape=(2 * self.board_size - 1, 2 * self.board_size - 1), dtype=np.float32 ), "tiles": Box(0, self.num_colors, shape=(self.init_draw,), dtype=np.int32), - "scores": Box(0, self.game.limitation_score, shape=(self.num_colors,), dtype=np.int32), + "scores": Box(0, self.game.max_score, shape=(self.num_colors,), dtype=np.int32), } ), "action_mask": Box(low=0, high=1, shape=(len(self.game.masked_action),), dtype=np.int8), @@ -130,10 +135,8 @@ def __init__( self.action_spaces = dict(zip(self.agents, [Discrete(len(self.game.masked_action))] * num_agents)) - # The reward after one move is the difference between the previous and current score. - self.reward_spaces = dict( - zip(self.agents, [Box(0, self.game.limitation_score, shape=(self.num_colors,))] * num_agents) - ) + # The reward for each move is the difference between the previous and current score. + self.reward_spaces = dict(zip(self.agents, [Box(0, self.game.max_score, shape=(self.num_colors,))] * num_agents)) @functools.lru_cache(maxsize=None) @override @@ -148,7 +151,6 @@ def action_space(self, agent): @override def reward_space(self, agent): - """Returns the reward space for the given agent.""" return self.reward_spaces[agent] @override @@ -165,8 +167,7 @@ def render(self): @override def reset(self, seed=None, options=None): """Reset needs to initialize the `agents` attribute and must set up the environment so that render(), - and step() can be called without issues. - """ + and step() can be called without issues.""" if seed is not None: np.random.seed(seed) random.seed(seed) @@ -190,7 +191,6 @@ def step(self, action): Args: action: action of the active agent """ - current_agent = self.agent_selection if self.terminations[current_agent] or self.truncations[current_agent]: @@ -209,24 +209,23 @@ def step(self, action): if self.game.end_flag: self.terminations = {agent: True for agent in self.agents} - # update teammate score(copy current agent score to the teammate) - if self.teammate_mode != "competitive": + # update teammate score (copy current agent's score to teammates) + if self.reward_mode != "competitive": index_current_agent = self.agents.index(current_agent) - for i in range(0, self.num_players): - if self.teammate_mode == "two_teams": - # two team mode, players who is teammates of the current agent has the same reward and score + for i in range(0, self.num_agents): + if self.reward_mode == "two_teams": + # in two_team mode, players who are teammates of the current agent get the same reward and score if i != index_current_agent and i % 2 == index_current_agent % 2: agent = self.agents[i] self.game.score[agent] = self.game.score[current_agent] self.rewards[agent] = self.rewards[current_agent] - elif self.teammate_mode == "collaborative": - # collabarotive mode, every player has the same reward and score + elif self.reward_mode == "collaborative": + # in collaborative mode, every player gets the same reward and score if i != index_current_agent: agent = self.agents[i] self.game.score[agent] = self.game.score[current_agent] self.rewards[agent] = self.rewards[current_agent] - # update accumulate_rewards self._accumulate_rewards() # update to next agent @@ -243,15 +242,11 @@ def observe(self, agent): if self.fully_obs: p_tiles = np.array([item for item in self.game.p_tiles.values()], dtype=np.int32) else: - # print(self.game.p_tiles[agent]) p_tiles = np.array(self.game.p_tiles[agent], dtype=np.int32) - # p_score = np.array(list(self.game.score[agent].values()), dtype=np.int32) - # show all score board tmp = [] for agent_score in self.game.score.values(): tmp.append([score for score in agent_score.values()]) p_score = np.array(tmp, dtype=np.int32) - observation = {"board": board_vals, "tiles": p_tiles, "scores": p_score} action_mask = np.array(self.game.return_action_list(), dtype=np.int8) return {"observation": observation, "action_mask": action_mask} diff --git a/momaland/envs/ingenious/ingenious_base.py b/momaland/envs/ingenious/ingenious_base.py index b7eb4313..317da72f 100644 --- a/momaland/envs/ingenious/ingenious_base.py +++ b/momaland/envs/ingenious/ingenious_base.py @@ -1,6 +1,6 @@ -"""Base class for Ingenious environment. +"""Base class for the Ingenious environment. -This class is not meant to be instantiated directly. This class supports the MOIngenious environment and provides the +This class is not meant to be instantiated directly. This class supports the Ingenious environment and provides the board and rules. """ @@ -11,12 +11,6 @@ import numpy as np -# red 12-pointed star -# green circle -# blue 6-pointed star -# orange hexagon -# yellow 24-pointed star -# purple ring RED = 1 GREEN = 2 BLUE = 3 @@ -25,13 +19,12 @@ PURPLE = 6 ALL_COLORS = [RED, GREEN, BLUE, ORANGE, YELLOW, PURPLE] COLOR_NAMES = ["red", "green", "blue", "orange", "yellow", "purple"] - NUM_TILES = 120 Hex = collections.namedtuple("Hex", ["q", "r", "s"]) def hex_coord(q, r, s): - """Create a cube-based coordinates.""" + """Create a cube-based coordinate.""" assert not (round(q + r + s) != 0), "q + r + s must be 0" return Hex(q, r, s) @@ -98,7 +91,7 @@ def generate_board(board_size): class IngeniousBase: - """Base class for Ingenious environment.""" + """Base class for the Ingenious environment.""" def __init__(self, num_agents=2, rack_size=6, num_colors=6, board_size=6, max_score=18): """Initialize the Ingenious environment. @@ -117,13 +110,13 @@ def __init__(self, num_agents=2, rack_size=6, num_colors=6, board_size=6, max_sc assert num_agents <= num_colors, "Number of agents cannot be larger than number of colors. " self.board_size = board_size - self.num_player = num_agents - self.agents = [f"agent_{i}" for i in range(self.num_player)] + self.num_agents = num_agents + self.agents = [f"agent_{i}" for i in range(self.num_agents)] self.agent_selector = 0 - self.limitation_score = max_score + self.max_score = max_score self.colors = num_colors self.corner_color = ALL_COLORS - self.init_draw = rack_size + self.rack_size = rack_size self.board_array = np.zeros([2 * self.board_size - 1, 2 * self.board_size - 1]) self.board_hex = generate_board(self.board_size) # original full board self.action_map = {} @@ -145,7 +138,7 @@ def __init__(self, num_agents=2, rack_size=6, num_colors=6, board_size=6, max_sc neighbour = hex_neighbor(loc, direct) if neighbour not in self.board_hex: continue - for i in range(0, self.init_draw): + for i in range(0, self.rack_size): if (loc, neighbour, i) not in self.action_map: self.action_map[(loc, neighbour, i)] = self.action_size self.action_index_map[self.action_size] = (loc, neighbour, i) @@ -174,7 +167,7 @@ def reset_game(self, seed=None): neighbour = hex_neighbor(loc, direct) if neighbour not in self.board_hex: continue - for i in range(0, self.init_draw): + for i in range(0, self.rack_size): if (loc, neighbour, i) not in self.action_map: self.action_map[(loc, neighbour, i)] = self.action_size self.action_index_map[self.action_size] = (loc, neighbour, i) @@ -192,17 +185,17 @@ def reset_game(self, seed=None): self.score = {agent: {ALL_COLORS[i]: 0 for i in range(0, self.colors)} for agent in self.agents} def draw_tiles_fill(self): - """Draw tiles for single player with amount(self.init_draw) of tiles.""" - return [self.tiles_bag.pop(self.random.randrange(len(self.tiles_bag))) for _ in range(self.init_draw)] + """Draw rack_size tiles for single player.""" + return [self.tiles_bag.pop(self.random.randrange(len(self.tiles_bag))) for _ in range(self.rack_size)] def get_tile(self, a): """Draw tiles for a specific player.""" - while len(self.p_tiles[a]) < self.init_draw: + while len(self.p_tiles[a]) < self.rack_size: self.p_tiles[a].append(self.tiles_bag.pop(self.random.randrange(len(self.tiles_bag)))) return def initial_corner(self): - """Initialise the corner of the board with the 6 colors.""" + """Initialise the corners of the board with the 6 colors.""" for i in range(0, 6): a = hex_scale(hex_directions[i], self.board_size - 1) x, y = Hex2ArrayLocation(a, self.board_size) @@ -217,7 +210,7 @@ def initial_corner(self): hx2 = hex_neighbor(hx1, j) if (hx2 not in self.board_hex) or (hx1 not in self.board_hex) or (hx2 == a): continue - for card in range(0, self.init_draw): + for card in range(0, self.rack_size): c1 = self.action_map[(hx1, hx2, card)] c2 = self.action_map[(hx2, hx1, card)] self.first_round_pos.add(c1) @@ -239,7 +232,6 @@ def tiles_bag_reset(self): self.tiles_bag = int(NUM_TILES / len(diff_color_combinations + same_color_combinations)) * ( diff_color_combinations + same_color_combinations ) - # print(len(self.tiles_bag)) if self.board_size in [9, 10]: # cannot fill the board for 9 or 10(complement rule) self.tiles_bag *= 2 @@ -249,19 +241,18 @@ def tiles_bag_reset(self): def set_action_index(self, index): """Apply the corresponding action for the given index on the board.""" - """If selected actions is not a legal move, return False""" assert self.masked_action[index] == 1, "Illegal move, choose a valid action." if self.first_round: assert index in self.first_round_pos, ( "Illegal move, in the first round tiles can only be placed next to " "corners." ) - """Hex Coordinate: h1,h2 ; Tile to play: card""" - h1, h2, card = self.action_index_map[index] + # Hex Coordinate: h1,h2 ; Tile to play: tile + h1, h2, tile = self.action_index_map[index] agent_i = self.agent_selector agent = self.agents[agent_i] - assert card < len(self.p_tiles[agent]), "Illegal move: choosing tile out of hand(happening after ingenious)" - """Extract the certain tile (color1 , color2) as (c1,c2)""" - c1, c2 = self.p_tiles[agent][card] + assert tile < len(self.p_tiles[agent]), "Illegal move: choosing tile out of rack" + # Extract the tile (color1 , color2) as (c1,c2) + c1, c2 = self.p_tiles[agent][tile] # Translate Hex Coordinate to Offset Coordinate(x,y) x1, y1 = Hex2ArrayLocation(h1, self.board_size) x2, y2 = Hex2ArrayLocation(h2, self.board_size) @@ -275,61 +266,60 @@ def set_action_index(self, index): self.p_tiles[agent].remove(item) flag = True break - assert flag, "Illegal move: set the tile to the coordinate unsuccessfully" - """Update the mask_action list after the action""" + assert flag, "Illegal move: unsuccessfully setting the tile to the coordinate" + # Update the mask_action list after the action self.legal_move.remove(index) self.board_array[x1][y1] = c1 self.board_array[x2][y2] = c2 self.exclude_action(h1) self.exclude_action(h2) if self.first_round: - # if first round, each player should take different corner - # print('first round', h1,h2) + # In the first round, every player must start in a different corner self.exclude_position_first_round(h1) self.exclude_position_first_round(h2) - """Flag to signal if ingenious is called """ + # Flag to signal if ingenious is called skip_flag = False - """flags to avoid calling ingenious on colour that was already maxed out """ + # flags to avoid calling ingenious on colour that was already maxed out ingenious_possible = [True, True] - if self.score[agent][c1] == self.limitation_score: + if self.score[agent][c1] == self.max_score: ingenious_possible[0] = False - if self.score[agent][c2] == self.limitation_score: + if self.score[agent][c2] == self.max_score: ingenious_possible[1] = False - """Update score through checking 5 neighboring directions for h1 and h2 independently""" + # Update score through checking 5 neighboring directions for h1 and h2 independently self.score[agent][c1] += self.calculate_score_for_piece(h1, h2, c1) self.score[agent][c2] += self.calculate_score_for_piece(h2, h1, c2) - if self.score[agent][c1] > self.limitation_score and ingenious_possible[0]: + if self.score[agent][c1] > self.max_score and ingenious_possible[0]: skip_flag = True - self.score[agent][c1] = self.limitation_score - if self.score[agent][c2] > self.limitation_score and ingenious_possible[1]: + self.score[agent][c1] = self.max_score + if self.score[agent][c2] > self.max_score and ingenious_possible[1]: skip_flag = True - self.score[agent][c2] = self.limitation_score + self.score[agent][c2] = self.max_score - """End game if no more legal actions.""" + # End game if no more legal actions. if len(self.legal_move) == 0: self.end_flag = True - # Preserve the number of tiles in hand for each player to comply with observation dimensions - while len(self.p_tiles[agent]) < self.init_draw: + # Preserve the number of tiles in rack for each player to comply with observation dimensions + while len(self.p_tiles[agent]) < self.rack_size: self.p_tiles[agent].append((0, 0)) return True - """All tiles in hand has been played""" + # All tiles in rack have been played if len(self.p_tiles[agent]) == 0: - self.end_flag = True # The player should win instantly if he plays out all the tiles in hand. - # Preserve the number of tiles in hand for each player to comply with observation dimensions - while len(self.p_tiles[agent]) < self.init_draw: + self.end_flag = True # The player should win instantly if he plays out all the tiles in rack. + # Preserve the number of tiles in rack for each player to comply with observation dimensions + while len(self.p_tiles[agent]) < self.rack_size: self.p_tiles[agent].append((0, 0)) return True - """In the original rules of the game, when a player calls ingenious, they can play a bonus round without - replenishing tiles in hand. However, due to implementation constraints in our case the player replenishes its - hand in all cases (ingenious or not)""" + # In the original rules of the game, when a player calls ingenious, they can play a bonus round without + # replenishing the tiles in the rack. However, in our implementation the player replenishes their rack in all + # cases, ingenious or not. self.get_tile(agent) # Rule that says if you have no tiles of a color, you can swap your tiles with the lowest score. - self.refresh_hand(agent) + self.refresh_rack(agent) # Pass turn to next player if ingenious was not called if not skip_flag: self.next_turn() @@ -356,7 +346,7 @@ def exclude_action(self, hx): hx2 = hex_neighbor(hx, i) if hx2 not in self.board_hex: continue - for card in range(0, self.init_draw): + for card in range(0, self.rack_size): x = self.action_map[(hx, hx2, card)] self.masked_action[x] = 0 if x in self.legal_move: @@ -368,25 +358,24 @@ def exclude_action(self, hx): def next_turn(self): """Move to the next turn.""" - self.agent_selector = (self.agent_selector + 1) % self.num_player + self.agent_selector = (self.agent_selector + 1) % self.num_agents if self.agent_selector == 0 and self.first_round: self.first_round = False return self.agent_selector - def refresh_hand(self, player): - """Additional rule to refresh hand-held tiles.""" - """find the color for which the player has the lowest score""" + def refresh_rack(self, player): + """Additional rule to refresh rack tiles.""" + # find the color for which the player has the lowest score minval = min(self.score[player].values()) flag_lowest_score = False for item in self.p_tiles[player]: for col in item: - # print(player,self.p_tiles[player],item, col, self.score[player]) if self.score[player][col] == minval: flag_lowest_score = True if flag_lowest_score: break if not flag_lowest_score: - """no lowest score color""" + # no lowest score color # save current unused tiles to add them back to the tiles bag back_up = self.p_tiles[player].copy() # clear the player's tiles @@ -408,27 +397,26 @@ def return_action_list(self): def log(self): """Print the current status of the game.""" - print({"board_size": self.board_size, "num_players": self.num_player}) + print({"board_size": self.board_size, "num_players": self.num_agents}) print("selector", self.agent_selector) print(self.board_array) print(self.score) print(self.p_tiles) def exclude_position_first_round(self, pos): - """Exclude available position in self.first_round_pos to ensure that each player begins with a different corner ( each corner is taken once).""" + """Ensure that each player begins with a different corner.""" for i in range(0, 6): neighbor_hex = hex_neighbor(pos, i) if hex_scale(neighbor_hex, 1.0 / (self.board_size - 1)) in hex_directions: # neighbor_hex is corner a = neighbor_hex - # print("find the corner to remove in first round",a,pos) for k in range(0, 6): hx1 = hex_neighbor(a, k) for j in range(0, 6): hx2 = hex_neighbor(hx1, j) if (hx2 not in self.board_hex) or (hx1 not in self.board_hex) or (hx2 == a): continue - for card in range(0, self.init_draw): + for card in range(0, self.rack_size): c1 = self.action_map[(hx1, hx2, card)] c2 = self.action_map[(hx2, hx1, card)] if c1 in self.first_round_pos: diff --git a/momaland/envs/ingenious/ingenious_check.py b/momaland/envs/ingenious/ingenious_check.py index 1e8e3076..cfa92f49 100644 --- a/momaland/envs/ingenious/ingenious_check.py +++ b/momaland/envs/ingenious/ingenious_check.py @@ -5,7 +5,7 @@ import gymnasium import numpy as np -from momaland.envs.ingenious.ingenious import MOIngenious +from momaland.envs.ingenious.ingenious import Ingenious # from ingenious import MOIngenious from momaland.envs.ingenious.ingenious_base import Hex2ArrayLocation @@ -63,7 +63,7 @@ def test_move(): Returns: True or False """ - ig_env = MOIngenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) + ig_env = Ingenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) ig_env.reset() # print(ig_env.game.board_array, "nweowjrowhafhif!!!!!!!!!") @@ -91,7 +91,7 @@ def test_step(): Returns: True or False """ - ig_env = MOIngenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) + ig_env = Ingenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) ig_env.reset() flag = True @@ -157,7 +157,7 @@ def test_reset(): Returns: True or False """ - ig_env = MOIngenious(num_agents=2, rack_size=2, num_colors=2, board_size=4) + ig_env = Ingenious(num_agents=2, rack_size=2, num_colors=2, board_size=4) ig_env.reset(105) train(ig_env) ig_env.reset(110) @@ -186,11 +186,11 @@ def test_reset(): def test_ingenious_rule(): """Ingenious rule test in a small case setting; when game end successfully, no agent should successively play 3 times.""" - ig_env = MOIngenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) + ig_env = Ingenious(num_agents=2, rack_size=2, num_colors=2, board_size=8) ag = -1 sum = 0 ig_env.reset() - ig_env.game.limitation_score = 5 + ig_env.game.max_score = 5 done = False if_exeed = True @@ -222,7 +222,7 @@ def test_ingenious_rule(): def test_API(): """Test observe interface in ingenous.py.""" - ig_env = MOIngenious() + ig_env = Ingenious() ig_env.limitation_score = 10000 # ag = ig_env.agent_selection # obs = ig_env.observe(ag) @@ -314,7 +314,7 @@ def test_API(): def check_fully_observable(): """Test observable trigger in ingenous.py.""" - ig_env = MOIngenious(fully_obs=True) + ig_env = Ingenious(fully_obs=True) ig_env.reset() ag = ig_env.agent_selection obs = ig_env.observe(ag) @@ -324,7 +324,7 @@ def check_fully_observable(): def check_two_team(): """Test teammate(reward sharing) in ingenous.py.""" - ig_env = MOIngenious(num_agents=4, reward_mode="two_teams") + ig_env = Ingenious(num_agents=4, reward_mode="two_teams") ig_env.reset() ag = ig_env.agent_selection obs = ig_env.observe(ag) @@ -353,7 +353,7 @@ def check_two_team(): def check_collaborative(): """Test teammate(reward sharing) in ingenous.py.""" - ig_env = MOIngenious(num_agents=4, reward_mode="collaborative") + ig_env = Ingenious(num_agents=4, reward_mode="collaborative") ig_env.reset() ag = ig_env.agent_selection obs = ig_env.observe(ag) @@ -406,7 +406,7 @@ def check_parameter_range(): ) try: - ig_env = MOIngenious( + ig_env = Ingenious( num_agents=n_player, rack_size=draw, num_colors=color, diff --git a/momaland/envs/ingenious/ingenious_seedtest.py b/momaland/envs/ingenious/ingenious_seedtest.py index acfa41c9..17abcb2b 100644 --- a/momaland/envs/ingenious/ingenious_seedtest.py +++ b/momaland/envs/ingenious/ingenious_seedtest.py @@ -3,7 +3,7 @@ # import random import numpy as np -from ingenious import MOIngenious +from ingenious import Ingenious # from ingenious_base import Hex2ArrayLocation @@ -165,9 +165,9 @@ def data_equivalence(data_1, data_2) -> bool: if __name__ == "__main__": - ig_env = MOIngenious(num_agents=4, rack_size=4, num_colors=4, board_size=8) + ig_env = Ingenious(num_agents=4, rack_size=4, num_colors=4, board_size=8) - ig_env2 = MOIngenious(num_agents=4, rack_size=4, num_colors=4, board_size=8) + ig_env2 = Ingenious(num_agents=4, rack_size=4, num_colors=4, board_size=8) env1 = ig_env env2 = ig_env2 From 34093b4baa12ca114274c17a49e4d9eab08556c0 Mon Sep 17 00:00:00 2001 From: threepwoody Date: Tue, 19 Mar 2024 22:21:08 +0100 Subject: [PATCH 11/18] Docstring for Ingenious --- momaland/envs/ingenious/ingenious.py | 90 ++++++++++++++++++---- momaland/envs/ingenious/ingenious_check.py | 2 +- 2 files changed, 74 insertions(+), 18 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index d42b2f87..2ee64329 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -1,15 +1,65 @@ """Ingenious environment. -|--------------------|--------------------------------------------------| -| Actions | Discrete | -| Parallel API | No | -| Manual Control | No | -| Agents | 2 | -| Action Shape | (1,) | -| Action Values | Discrete(board_width=8 * board_height=8 * 3) | -| Observation Shape | (board_height=8, board_width=8, 2) | -| Observation Values | [0,1] | -| Reward Shape | (num_objectives=4,) | +|--------------------|--------------------------------------------------------------| +| Actions | Discrete | +| Parallel API | No | +| Manual Control | No | +| Agents | num_agents=2 | +| Action Shape | (1,) | +| Action Values | Discrete(size depends on board size and rack size: there | +| | is one integer encoding the placement of each rack tile | +| | on each board hex in each possible direction.) | +| Observations | Observations are dicts with three entries: | +| | "board": array with size (2*board_size-1, 2*board_size-1) | +| | containing values from 0 to num_colors; | +| | "racks": for each observable agent, an array of length | +| | rack_size containing pairs of values from 0 to num_colors; | +| | "scores": for all agents, their scores in all num_colors | +| | objectives as values from 0 to max_score. | +| Reward Shape | (num_colors=6,) | + +This environment is based on the Ingenious game: https://boardgamegeek.com/boardgame/9674/ingenious + +The game's original rules support multiple players collecting scores in multiple colors, which we define as the +objectives of the game: for example (red=5, green=2, blue=9). The goal in the original game is to maximize the +minimum score over all colors (2 in the example above), however we leave the utility wrapper up to the users and only +return the vectorial score on each color dimension (5,2,9). + + +### Observation Space + +The observation is a dictionary which contains an 'observation' element which is the usual RL observation, +and an 'action_mask' which holds the legal moves, described in the Legal Actions Mask section below. + +The 'observation' element itself is a dictionary with three entries: 'board' is representing the hexagonal board as +an array of size (2*board_size-1, 2*board_size-1) with integer entries from 0 (empty hex) to num_colors (tiles of +different colors). 'racks' represents for each observable agent - by default only the acting agent, if fully_obs=True +all agents - their tiles rack as an array of size rack_size containing pairs of integers (each pair is a tile) from 0 +to num_colors. 'scores' represents for all agents their current scores in all num_colors objectives, as integers from +0 to max_score. + + +#### Legal Actions Mask + +The legal moves available to the current agent are found in the 'action_mask' element of the dictionary observation. +The 'action_mask' is a binary vector where each index of the vector represents whether the represented action is legal +or not; the action encoding is described in the Action Space section below. +The 'action_mask' will be all zeros for any agent except the one whose turn it is. + + +### Action Space + +The action space depends on board size and rack size: It contains one integer for each possible placement of any of +the player's rack tiles (rack_size parameter) on any board hex (board_size parameter) in every possible direction. + + +### Rewards + +The agents can collect a separate score in each available color. These scores are the num_colors different reward +dimensions. + + +### Version History """ @@ -70,14 +120,20 @@ def __init__( num_agents (int): The number of agents (between 2 and 6). Default is 2. rack_size (int): The number of tiles each player keeps in their rack (between 2 and 6). Default is 6. num_colors (int): The number of colors (objectives) in the game (between 2 and 6). Default is 6. - board_size (int): The size of one side of the hexagonal board (between 3 and 10). By default the size is set to n+4 where n is the number of agents. - reward_mode (str): Can be set to "competitive" (individual rewards for all agents), "collaborative" (shared rewards for all agents), or "two_teams" (rewards shared within two opposing teams; num_agents needs to be even). Default is "competitive". + + board_size (int): The size of one side of the hexagonal board (between 3 and 10). By default the size is set + to n+4 where n is the number of agents. + + reward_mode (str): Can be set to "competitive" (individual rewards for all agents), "collaborative" (shared + rewards for all agents), or "two_teams" (rewards shared within two opposing teams; num_agents needs to be + even). Default is "competitive". + fully_obs (bool): Fully observable game mode, i.e. the racks of all players are visible. Default is False. render_mode (str): The rendering mode. Default: None """ self.num_colors = num_colors self.init_draw = rack_size - self.limitation_score = 18 # max score in score board for one certain color. + self.max_score = 18 # max score in score board for one certain color. assert reward_mode in { "competitive", "collaborative", @@ -88,9 +144,9 @@ def __init__( if self.reward_mode == "two_teams": assert num_agents % 2 == 0, "Number of players must be even if reward_mode is two_teams." - self.limitation_score = self.limitation_score * (num_agents / 2) + self.max_score = self.max_score * (num_agents / 2) elif self.reward_mode == "collaborative": - self.limitation_score = self.limitation_score * num_agents + self.max_score = self.max_score * num_agents if board_size is None: self.board_size = {2: 6, 3: 7, 4: 8, 5: 9, 6: 10}.get(self.num_agents) @@ -102,7 +158,7 @@ def __init__( rack_size=self.init_draw, num_colors=self.num_colors, board_size=self.board_size, - max_score=self.limitation_score, + max_score=self.max_score, ) self.possible_agents = ["agent_" + str(r) for r in range(num_agents)] @@ -247,6 +303,6 @@ def observe(self, agent): for agent_score in self.game.score.values(): tmp.append([score for score in agent_score.values()]) p_score = np.array(tmp, dtype=np.int32) - observation = {"board": board_vals, "tiles": p_tiles, "scores": p_score} + observation = {"board": board_vals, "racks": p_tiles, "scores": p_score} action_mask = np.array(self.game.return_action_list(), dtype=np.int8) return {"observation": observation, "action_mask": action_mask} diff --git a/momaland/envs/ingenious/ingenious_check.py b/momaland/envs/ingenious/ingenious_check.py index cfa92f49..6295917a 100644 --- a/momaland/envs/ingenious/ingenious_check.py +++ b/momaland/envs/ingenious/ingenious_check.py @@ -223,7 +223,7 @@ def test_ingenious_rule(): def test_API(): """Test observe interface in ingenous.py.""" ig_env = Ingenious() - ig_env.limitation_score = 10000 + ig_env.max_score = 10000 # ag = ig_env.agent_selection # obs = ig_env.observe(ag) # print(sum(masked_act_list)) From 10a4066c32cc419983c629fb48cca651d98f7a69 Mon Sep 17 00:00:00 2001 From: threepwoody Date: Wed, 20 Mar 2024 20:58:52 +0100 Subject: [PATCH 12/18] Fixed num_agents --- momaland/envs/ingenious/ingenious.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 2ee64329..4fb40a06 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -149,12 +149,12 @@ def __init__( self.max_score = self.max_score * num_agents if board_size is None: - self.board_size = {2: 6, 3: 7, 4: 8, 5: 9, 6: 10}.get(self.num_agents) + self.board_size = {2: 6, 3: 7, 4: 8, 5: 9, 6: 10}.get(num_agents) else: self.board_size = board_size self.game = IngeniousBase( - num_agents=self.num_agents, + num_agents=num_agents, rack_size=self.init_draw, num_colors=self.num_colors, board_size=self.board_size, From bedeb1e92a2c3955842fd9f2ffcad074c176d280 Mon Sep 17 00:00:00 2001 From: threepwoody Date: Wed, 20 Mar 2024 21:53:12 +0100 Subject: [PATCH 13/18] fixed observation naming --- momaland/envs/ingenious/ingenious.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 4fb40a06..76590d15 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -179,7 +179,7 @@ def __init__( "board": Box( 0, len(ALL_COLORS), shape=(2 * self.board_size - 1, 2 * self.board_size - 1), dtype=np.float32 ), - "tiles": Box(0, self.num_colors, shape=(self.init_draw,), dtype=np.int32), + "racks": Box(0, self.num_colors, shape=(self.init_draw,), dtype=np.int32), "scores": Box(0, self.game.max_score, shape=(self.num_colors,), dtype=np.int32), } ), From a9511e482bc958802d94ec53c9b0e2471830d926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roxana=20R=C4=83dulescu?= <8026679+rradules@users.noreply.github.com> Date: Wed, 20 Mar 2024 22:27:55 +0100 Subject: [PATCH 14/18] update obs space for competitive mode - scores component --- momaland/envs/ingenious/ingenious.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 76590d15..35550e49 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -179,8 +179,8 @@ def __init__( "board": Box( 0, len(ALL_COLORS), shape=(2 * self.board_size - 1, 2 * self.board_size - 1), dtype=np.float32 ), - "racks": Box(0, self.num_colors, shape=(self.init_draw,), dtype=np.int32), - "scores": Box(0, self.game.max_score, shape=(self.num_colors,), dtype=np.int32), + "racks": Box(0, self.num_colors, shape=(self.init_draw, 2), dtype=np.int32), + "scores": Box(0, self.game.max_score, shape=(num_agents, self.num_colors), dtype=np.int32), } ), "action_mask": Box(low=0, high=1, shape=(len(self.game.masked_action),), dtype=np.int8), From ef7c3726673d169ef8b2678f5cb4ba7fbd5f886c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roxana=20R=C4=83dulescu?= <8026679+rradules@users.noreply.github.com> Date: Wed, 20 Mar 2024 22:38:20 +0100 Subject: [PATCH 15/18] adjust obs space for the fully observable case - racks component --- momaland/envs/ingenious/ingenious.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 35550e49..474ad6b2 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -179,7 +179,9 @@ def __init__( "board": Box( 0, len(ALL_COLORS), shape=(2 * self.board_size - 1, 2 * self.board_size - 1), dtype=np.float32 ), - "racks": Box(0, self.num_colors, shape=(self.init_draw, 2), dtype=np.int32), + "racks": Box(0, self.num_colors, shape=(num_agents, self.init_draw, 2), dtype=np.int32) + if self.fully_obs + else Box(0, self.num_colors, shape=(self.init_draw, 2), dtype=np.int32), "scores": Box(0, self.game.max_score, shape=(num_agents, self.num_colors), dtype=np.int32), } ), From c03571dd51cbeea744e3e1de7e248ee3e9b61cbd Mon Sep 17 00:00:00 2001 From: threepwoody Date: Tue, 2 Apr 2024 20:37:33 +0200 Subject: [PATCH 16/18] Ingenious doc fix --- momaland/envs/ingenious/ingenious.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 474ad6b2..066e2175 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -44,7 +44,7 @@ The legal moves available to the current agent are found in the 'action_mask' element of the dictionary observation. The 'action_mask' is a binary vector where each index of the vector represents whether the represented action is legal or not; the action encoding is described in the Action Space section below. -The 'action_mask' will be all zeros for any agent except the one whose turn it is. +The 'action_mask' shows only the current agent's legal moves. ### Action Space From d030f84cc3d118dbae507ce2f7724c2956793694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roxana=20R=C4=83dulescu?= <8026679+rradules@users.noreply.github.com> Date: Mon, 8 Apr 2024 15:55:39 +0200 Subject: [PATCH 17/18] fix merge --- momaland/envs/ingenious/ingenious.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/momaland/envs/ingenious/ingenious.py b/momaland/envs/ingenious/ingenious.py index 066e2175..578e9483 100644 --- a/momaland/envs/ingenious/ingenious.py +++ b/momaland/envs/ingenious/ingenious.py @@ -120,14 +120,11 @@ def __init__( num_agents (int): The number of agents (between 2 and 6). Default is 2. rack_size (int): The number of tiles each player keeps in their rack (between 2 and 6). Default is 6. num_colors (int): The number of colors (objectives) in the game (between 2 and 6). Default is 6. - board_size (int): The size of one side of the hexagonal board (between 3 and 10). By default the size is set to n+4 where n is the number of agents. - reward_mode (str): Can be set to "competitive" (individual rewards for all agents), "collaborative" (shared rewards for all agents), or "two_teams" (rewards shared within two opposing teams; num_agents needs to be even). Default is "competitive". - fully_obs (bool): Fully observable game mode, i.e. the racks of all players are visible. Default is False. render_mode (str): The rendering mode. Default: None """ From 00216cab6447ab297923c0a35eb4be4508cb92b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roxana=20R=C4=83dulescu?= <8026679+rradules@users.noreply.github.com> Date: Mon, 8 Apr 2024 16:57:39 +0200 Subject: [PATCH 18/18] avoid PZ test breaking --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a9ee011e..aeec166f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,7 +23,7 @@ jobs: pip install pytest sudo apt-get update sudo apt-get install libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg cmake swig - pip install git+https://github.com/Farama-Foundation/PettingZoo.git + # pip install git+https://github.com/Farama-Foundation/PettingZoo.git pip install -e .[all] - name: Full Python tests run: |