From a83f53d490eb45d075d0d17576d5d4551c7d6766 Mon Sep 17 00:00:00 2001 From: daphnecor Date: Sun, 12 Jan 2025 16:56:51 -0500 Subject: [PATCH 01/10] ignore .sh files --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index f42d4370..f25b820f 100644 --- a/.gitignore +++ b/.gitignore @@ -242,6 +242,9 @@ pyrightconfig.json # To be manually created using .env.template .env +# Sbatch scripts +*.sh + # Logs examples/experiments/scripts/logs/* From fb1659a40bcd9e63b1da41b40ba07bd9e6acb49c Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Mon, 13 Jan 2025 23:21:20 +0000 Subject: [PATCH 02/10] WIP. Not ready yet --- baselines/ippo/ippo_pufferlib.py | 5 +- .../experiments/ippo_ff_p1_self_play.yaml | 12 +- integrations/rl/puffer/puffer_env.py | 46 ++-- networks/late_fusion.py | 2 +- pygpudrive/datatypes/observation.py | 40 ++-- pygpudrive/datatypes/roadgraph.py | 19 +- pygpudrive/env/base_env.py | 2 +- pygpudrive/env/env_torch.py | 208 ++++++++---------- 8 files changed, 154 insertions(+), 180 deletions(-) diff --git a/baselines/ippo/ippo_pufferlib.py b/baselines/ippo/ippo_pufferlib.py index d7f9905d..42c39188 100644 --- a/baselines/ippo/ippo_pufferlib.py +++ b/baselines/ippo/ippo_pufferlib.py @@ -285,5 +285,6 @@ def run( if __name__ == "__main__": - - app() + import cProfile + cProfile.run('app()', 'profiled') + #app() diff --git a/examples/experiments/ippo_ff_p1_self_play.yaml b/examples/experiments/ippo_ff_p1_self_play.yaml index 5a05d992..c2422a15 100644 --- a/examples/experiments/ippo_ff_p1_self_play.yaml +++ b/examples/experiments/ippo_ff_p1_self_play.yaml @@ -2,11 +2,11 @@ mode: "train" use_rnn: false eval_model_path: null baseline: false -data_dir: "data/processed/training" +data_dir: "data/processed/examples" environment: # Overrides default environment configs (see pygpudrive/env/config.py) name: "gpudrive" - num_worlds: 100 # Number of parallel environments + num_worlds: 200 # Number of parallel environments k_unique_scenes: 100 # Number of unique scenes to sample from max_controlled_agents: 128 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp ego_state: true @@ -29,7 +29,7 @@ wandb: entity: "" project: "paper_1_self_play" group: "verify" - mode: "online" # Options: online, offline, disabled + mode: "disabled" # Options: online, offline, disabled tags: ["ppo", "ff"] ## NOTES @@ -56,14 +56,14 @@ train: # # # PPO # # # torch_deterministic: false - total_timesteps: 1_000_000_000 + total_timesteps: 1_000_000 batch_size: 131_072 - minibatch_size: 8192 + minibatch_size: 16_384 learning_rate: 3e-4 anneal_lr: false gamma: 0.99 gae_lambda: 0.95 - update_epochs: 5 + update_epochs: 1 norm_adv: true clip_coef: 0.2 clip_vloss: false diff --git a/integrations/rl/puffer/puffer_env.py b/integrations/rl/puffer/puffer_env.py index f6dc375e..edf9b48c 100644 --- a/integrations/rl/puffer/puffer_env.py +++ b/integrations/rl/puffer/puffer_env.py @@ -94,7 +94,7 @@ def __init__( self.num_agents = self.controlled_agent_mask.sum().item() # Reset the environment and get the initial observations - self.observations = self.env.reset()[self.controlled_agent_mask] + self.observations = self.env.reset(self.controlled_agent_mask) # This assigns a bunch of buffers to self. # You can't use them because you want torch, not numpy @@ -215,9 +215,8 @@ def step(self, action): ) == self.controlled_agent_mask.sum(dim=1) )[0] - .cpu() - .numpy() ) + done_worlds_cpu = done_worlds.cpu().numpy() # Add rewards for living agents self.agent_episode_returns[self.live_agent_mask] += reward[ @@ -309,14 +308,18 @@ def step(self, action): controlled_mask ] + ''' ego_state = LocalEgoState.from_tensor( self_obs_tensor=self.env.sim.self_observation_tensor(), backend="torch", device=self.device, + mask=controlled_mask ) - agent_speeds = ( - ego_state.speed[done_worlds][controlled_mask].cpu().numpy() - ) + ''' + agent_speeds = 0 #( + # TODO: What are you logging here? Final speed of last agents in finished worlds? + # ego_state.speed[done_worlds][controlled_mask].cpu().numpy() + #) if num_finished_agents > 0: # fmt: off @@ -328,27 +331,28 @@ def step(self, action): "perc_veh_collisions": collision_rate.item(), "total_controlled_agents": self.num_agents, "control_density": self.num_agents / self.controlled_agent_mask.numel(), - "mean_agent_speed": agent_speeds.mean().item(), + #"mean_agent_speed": agent_speeds.mean().item(), + "mean_agent_speed": 0, "episode_length": self.episode_lengths[done_worlds, :].mean().item(), } ) # fmt: on - # Asynchronously reset the done worlds and empty storage - for idx in done_worlds: - self.env.sim.reset([idx]) - self.episode_returns[idx] = 0 - self.agent_episode_returns[idx, :] = 0 - self.episode_lengths[idx, :] = 0 - # Reset the live agent mask so that the next alive mask will mark - # all agents as alive for the next step - self.live_agent_mask[idx] = self.controlled_agent_mask[idx] - self.offroad_in_episode[idx, :] = 0 - self.collided_in_episode[idx, :] = 0 + # Asynchronously reset the done worlds and empty storage + self.env.sim.reset(done_worlds_cpu) + self.episode_returns[done_worlds] = 0 + self.agent_episode_returns[done_worlds, :] = 0 + self.episode_lengths[done_worlds, :] = 0 + # Reset the live agent mask so that the next alive mask will mark + # all agents as alive for the next step + self.live_agent_mask[done_worlds] = self.controlled_agent_mask[done_worlds] + self.offroad_in_episode[done_worlds, :] = 0 + self.collided_in_episode[done_worlds, :] = 0 + # (6) Get the next observations. Note that we do this after resetting # the worlds so that we always return a fresh observation - next_obs = self.env.get_obs()[self.controlled_agent_mask] - + #next_obs = self.env.get_obs()[self.controlled_agent_mask] + next_obs = self.env.get_obs(self.controlled_agent_mask) self.observations = next_obs self.rewards = reward_controlled self.terminals = terminal @@ -416,7 +420,7 @@ def render_agent_observations(self, env_idx): def resample_scenario_batch(self): """Sample and set new batch of WOMD scenarios.""" - + return # Swap the data batch self.env.swap_data_batch() diff --git a/networks/late_fusion.py b/networks/late_fusion.py index bbc5998b..42dac161 100644 --- a/networks/late_fusion.py +++ b/networks/late_fusion.py @@ -227,4 +227,4 @@ def _build_network(self, input_dim, net_arch, network_type): ) ) - return nn.Sequential(*layers) \ No newline at end of file + return nn.Sequential(*layers) diff --git a/pygpudrive/datatypes/observation.py b/pygpudrive/datatypes/observation.py index 93787420..a0aea26b 100644 --- a/pygpudrive/datatypes/observation.py +++ b/pygpudrive/datatypes/observation.py @@ -23,14 +23,14 @@ class LocalEgoState: def __init__(self, self_obs_tensor: torch.Tensor): """Initializes the ego state with an observation tensor.""" - self.speed = self_obs_tensor[:, :, 0] - self.vehicle_length = self_obs_tensor[:, :, 1] - self.vehicle_width = self_obs_tensor[:, :, 2] - self.vehicle_height = self_obs_tensor[:, :, 3] - self.rel_goal_x = self_obs_tensor[:, :, 4] - self.rel_goal_y = self_obs_tensor[:, :, 5] - self.is_collided = self_obs_tensor[:, :, 6] - self.id = self_obs_tensor[:, :, 7] + self.speed = self_obs_tensor[:, 0] + self.vehicle_length = self_obs_tensor[:, 1] + self.vehicle_width = self_obs_tensor[:, 2] + self.vehicle_height = self_obs_tensor[:, 3] + self.rel_goal_x = self_obs_tensor[:, 4] + self.rel_goal_y = self_obs_tensor[:, 5] + self.is_collided = self_obs_tensor[:, 6] + self.id = self_obs_tensor[:, 7] @classmethod def from_tensor( @@ -38,10 +38,11 @@ def from_tensor( self_obs_tensor: gpudrive.madrona.Tensor, backend="torch", device="cuda", + mask=None ): """Creates an LocalEgoState from the agent_observation_tensor.""" if backend == "torch": - return cls(self_obs_tensor.to_torch().clone().to(device)) + return cls(self_obs_tensor.to_torch()[mask].to(device)) elif backend == "jax": raise NotImplementedError("JAX backend not implemented yet.") @@ -148,15 +149,15 @@ class PartnerObs: def __init__(self, partner_obs_tensor: torch.Tensor): """Initializes the partner observation from a tensor.""" - self.speed = partner_obs_tensor[:, :, :, 0].unsqueeze(-1) - self.rel_pos_x = partner_obs_tensor[:, :, :, 1].unsqueeze(-1) - self.rel_pos_y = partner_obs_tensor[:, :, :, 2].unsqueeze(-1) - self.orientation = partner_obs_tensor[:, :, :, 3].unsqueeze(-1) - self.vehicle_length = partner_obs_tensor[:, :, :, 4].unsqueeze(-1) - self.vehicle_width = partner_obs_tensor[:, :, :, 5].unsqueeze(-1) - self.vehicle_height = partner_obs_tensor[:, :, :, 6].unsqueeze(-1) - self.agent_type = partner_obs_tensor[:, :, :, 7].unsqueeze(-1) - self.ids = partner_obs_tensor[:, :, :, 8].unsqueeze(-1) + self.speed = partner_obs_tensor[:, :, 0].unsqueeze(-1) + self.rel_pos_x = partner_obs_tensor[:, :, 1].unsqueeze(-1) + self.rel_pos_y = partner_obs_tensor[:, :, 2].unsqueeze(-1) + self.orientation = partner_obs_tensor[:, :, 3].unsqueeze(-1) + self.vehicle_length = partner_obs_tensor[:, :, 4].unsqueeze(-1) + self.vehicle_width = partner_obs_tensor[:, :, 5].unsqueeze(-1) + self.vehicle_height = partner_obs_tensor[:, :, 6].unsqueeze(-1) + self.agent_type = partner_obs_tensor[:, :, 7].unsqueeze(-1) + self.ids = partner_obs_tensor[:, :, 8].unsqueeze(-1) @classmethod def from_tensor( @@ -164,10 +165,11 @@ def from_tensor( partner_obs_tensor: gpudrive.madrona.Tensor, backend="torch", device="cuda", + mask=None, ): """Creates an PartnerObs from a tensor.""" if backend == "torch": - return cls(partner_obs_tensor.to_torch().clone().to(device)) + return cls(partner_obs_tensor.to_torch()[mask].to(device)) elif backend == "jax": raise NotImplementedError("JAX backend not implemented yet.") diff --git a/pygpudrive/datatypes/roadgraph.py b/pygpudrive/datatypes/roadgraph.py index 8f900daf..2d8e5fba 100644 --- a/pygpudrive/datatypes/roadgraph.py +++ b/pygpudrive/datatypes/roadgraph.py @@ -129,26 +129,27 @@ class LocalRoadGraphPoints: def __init__(self, local_roadgraph_tensor: torch.Tensor): """Initializes the global road graph points with a tensor.""" - self.x = local_roadgraph_tensor[:, :, :, 0] - self.y = local_roadgraph_tensor[:, :, :, 1] - self.segment_length = local_roadgraph_tensor[:, :, :, 2] - self.segment_width = local_roadgraph_tensor[:, :, :, 3] - self.segment_height = local_roadgraph_tensor[:, :, :, 4] - self.orientation = local_roadgraph_tensor[:, :, :, 5] - self.id = local_roadgraph_tensor[:, :, :, 7] + self.x = local_roadgraph_tensor[:, :, 0] + self.y = local_roadgraph_tensor[:, :, 1] + self.segment_length = local_roadgraph_tensor[:, :, 2] + self.segment_width = local_roadgraph_tensor[:, :, 3] + self.segment_height = local_roadgraph_tensor[:, :, 4] + self.orientation = local_roadgraph_tensor[:, :, 5] + self.id = local_roadgraph_tensor[:, :, 7] # TODO(dc): Use map type instead of enum (8 instead of 6) - self.type = local_roadgraph_tensor[:, :, :, 6].long() + self.type = local_roadgraph_tensor[:, :, 6].long() @classmethod def from_tensor( cls, local_roadgraph_tensor: gpudrive.madrona.Tensor, backend="torch", + mask=None, device="cuda", ): """Creates a GlobalRoadGraphPoints instance from a tensor.""" if backend == "torch": - return cls(local_roadgraph_tensor.to_torch().clone().to(device)) + return cls(local_roadgraph_tensor.to_torch().to(device)[mask]) elif backend == "jax": raise NotImplementedError("JAX backend not implemented yet.") diff --git a/pygpudrive/env/base_env.py b/pygpudrive/env/base_env.py index 1721aff1..f5047798 100755 --- a/pygpudrive/env/base_env.py +++ b/pygpudrive/env/base_env.py @@ -7,7 +7,7 @@ import abc import gpudrive import torch -import jax.numpy as jnp +#import jax.numpy as jnp class GPUDriveGymEnv(gym.Env, metaclass=abc.ABCMeta): diff --git a/pygpudrive/env/env_torch.py b/pygpudrive/env/env_torch.py index 2d1e3f2c..c27e24da 100755 --- a/pygpudrive/env/env_torch.py +++ b/pygpudrive/env/env_torch.py @@ -65,7 +65,7 @@ def __init__( # Setup action and observation spaces self.observation_space = Box( - low=-np.inf, high=np.inf, shape=(self.get_obs().shape[-1],) + low=-np.inf, high=np.inf, shape=(self.get_obs(self.cont_agent_mask).shape[-1],) ) # self.single_observation_space = Box( # low=-np.inf, high=np.inf, shape=(self.observation_space.shape[-1],), dtype=np.float32 @@ -99,10 +99,10 @@ def __init__( env_config=self.config, ) - def reset(self): + def reset(self, mask=None): """Reset the worlds and return the initial observations.""" self.sim.reset(list(range(self.num_worlds))) - return self.get_obs() + return self.get_obs(mask) def get_dones(self): return ( @@ -355,149 +355,115 @@ def _set_continuous_action_space(self) -> None: ) return action_space - def _get_ego_state(self) -> torch.Tensor: + def _get_ego_state(self, mask) -> torch.Tensor: """Get the ego state. Returns: Shape: (num_worlds, max_agents, num_features) """ - if self.config.ego_state: - ego_state = LocalEgoState.from_tensor( - self_obs_tensor=self.sim.self_observation_tensor(), - backend=self.backend, - ) - if self.config.norm_obs: - ego_state.normalize() - - return ( - torch.stack( - [ - ego_state.speed, - ego_state.vehicle_length, - ego_state.vehicle_width, - ego_state.rel_goal_x, - ego_state.rel_goal_y, - ego_state.is_collided, - ] - ) - .permute(1, 2, 0) - .to(self.device) - ) - else: - return torch.Tensor().to(self.device) + if not self.config.ego_state: + return [] - def _get_partner_obs(self): + ego_state = LocalEgoState.from_tensor( + self_obs_tensor=self.sim.self_observation_tensor(), + backend=self.backend, + mask=mask + ) + if self.config.norm_obs: + ego_state.normalize() + + # TODO: I deleted this permute. Was it needed? + #.permute(1, 2, 0) + return [ + ego_state.speed.unsqueeze(-1), + ego_state.vehicle_length.unsqueeze(-1), + ego_state.vehicle_width.unsqueeze(-1), + ego_state.rel_goal_x.unsqueeze(-1), + ego_state.rel_goal_y.unsqueeze(-1), + ego_state.is_collided.unsqueeze(-1), + ] + + def _get_partner_obs(self, mask): """Get partner observations.""" - if self.config.partner_obs: - partner_obs = PartnerObs.from_tensor( - partner_obs_tensor=self.sim.partner_observations_tensor(), - backend=self.backend, - ) + if not self.config.partner_obs: + return [] - if self.config.norm_obs: - partner_obs.normalize() - partner_obs.one_hot_encode_agent_types() - - return ( - torch.concat( - [ - partner_obs.speed, - partner_obs.rel_pos_x, - partner_obs.rel_pos_y, - partner_obs.orientation, - partner_obs.vehicle_length, - partner_obs.vehicle_width, - # partner_obs.agent_type, - ], - dim=-1, - ) - .flatten(start_dim=2) - .to(self.device) - ) + partner_obs = PartnerObs.from_tensor( + partner_obs_tensor=self.sim.partner_observations_tensor(), + backend=self.backend, + mask=mask + ) - else: - return torch.Tensor().to(self.device) + if self.config.norm_obs: + partner_obs.normalize() + partner_obs.one_hot_encode_agent_types() - def _get_road_map_obs(self): - """Get road map observations.""" - if self.config.road_map_obs: - roadgraph = LocalRoadGraphPoints.from_tensor( - local_roadgraph_tensor=self.sim.agent_roadmap_tensor(), - backend=self.backend, - ) + return [ + partner_obs.speed.squeeze(-1), + partner_obs.rel_pos_x.squeeze(-1), + partner_obs.rel_pos_y.squeeze(-1), + partner_obs.orientation.squeeze(-1), + partner_obs.vehicle_length.squeeze(-1), + partner_obs.vehicle_width.squeeze(-1), + ] - if self.config.norm_obs: - roadgraph.normalize() - roadgraph.one_hot_encode_road_point_types() - - return ( - torch.cat( - [ - roadgraph.x.unsqueeze(-1), - roadgraph.y.unsqueeze(-1), - roadgraph.segment_length.unsqueeze(-1), - roadgraph.segment_width.unsqueeze(-1), - roadgraph.segment_height.unsqueeze(-1), - roadgraph.orientation.unsqueeze(-1), - roadgraph.type, - ], - dim=-1, - ) - .flatten(start_dim=2) - .to(self.device) - ) + def _get_road_map_obs(self, mask): + """Get road map observations.""" + if not self.config.road_map_obs: + return [] - else: - return torch.Tensor().to(self.device) + roadgraph = LocalRoadGraphPoints.from_tensor( + local_roadgraph_tensor=self.sim.agent_roadmap_tensor(), + backend=self.backend, + mask=mask + ) - def _get_lidar_obs(self): + if self.config.norm_obs: + roadgraph.normalize() + roadgraph.one_hot_encode_road_point_types() + + return [ + roadgraph.x, + roadgraph.y, + roadgraph.segment_length, + roadgraph.segment_width, + roadgraph.segment_height, + roadgraph.orientation, + roadgraph.type.flatten(start_dim=1), + ] + + def _get_lidar_obs(self, mask): """Get lidar observations.""" - if self.config.lidar_obs: - lidar = LidarObs.from_tensor( - lidar_tensor=self.sim.lidar_tensor(), - backend=self.backend, - ) + if not self.config.lidar_obs: + return [] - return ( - torch.cat( - [ - lidar.agent_samples, - lidar.road_edge_samples, - lidar.road_line_samples, - ], - dim=-1, - ) - .flatten(start_dim=2) - .to(self.device) - ) - else: - return torch.Tensor().to(self.device) + lidar = LidarObs.from_tensor( + lidar_tensor=self.sim.lidar_tensor(), + backend=self.backend, + ) - def get_obs(self): + return [ + lidar.agent_samples[mask], + lidar.road_edge_samples[mask], + lidar.road_line_samples[mask], + ] + + def get_obs(self, mask=None): """Get observation: Combine different types of environment information into a single tensor. Returns: torch.Tensor: (num_worlds, max_agent_count, num_features) """ - ego_states = self._get_ego_state() - - partner_observations = self._get_partner_obs() + ego_states = self._get_ego_state(mask) - road_map_observations = self._get_road_map_obs() + partner_observations = self._get_partner_obs(mask) - lidar_obs = self._get_lidar_obs() + road_map_observations = self._get_road_map_obs(mask) - obs_filtered = torch.cat( - ( - ego_states, - partner_observations, - road_map_observations, - lidar_obs, - ), - dim=-1, - ) + lidar_obs = self._get_lidar_obs(mask) - return obs_filtered + obs = ego_states + partner_observations + road_map_observations + lidar_obs + return torch.cat(obs, dim=-1) def get_controlled_agents_mask(self): """Get the control mask.""" From 8fe608e3e6f0ac526af820498c59b63bc99ed6f5 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 14 Jan 2025 01:33:29 +0000 Subject: [PATCH 03/10] 250k sps training, doesn't learn --- .../experiments/ippo_ff_p1_self_play.yaml | 14 ++--- integrations/rl/puffer/puffer_env.py | 3 +- integrations/rl/puffer/utils.py | 1 - networks/late_fusion.py | 24 ++++---- pygpudrive/datatypes/observation.py | 55 ++++++++++++++++--- pygpudrive/datatypes/roadgraph.py | 11 ++-- pygpudrive/env/env_torch.py | 30 +++++----- pygpudrive/utils/geometry.py | 12 ++++ src/consts.hpp | 2 +- 9 files changed, 103 insertions(+), 49 deletions(-) diff --git a/examples/experiments/ippo_ff_p1_self_play.yaml b/examples/experiments/ippo_ff_p1_self_play.yaml index c2422a15..ae81da40 100644 --- a/examples/experiments/ippo_ff_p1_self_play.yaml +++ b/examples/experiments/ippo_ff_p1_self_play.yaml @@ -6,9 +6,9 @@ data_dir: "data/processed/examples" environment: # Overrides default environment configs (see pygpudrive/env/config.py) name: "gpudrive" - num_worlds: 200 # Number of parallel environments + num_worlds: 300 # Number of parallel environments k_unique_scenes: 100 # Number of unique scenes to sample from - max_controlled_agents: 128 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp + max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp ego_state: true road_map_obs: true partner_obs: true @@ -29,7 +29,7 @@ wandb: entity: "" project: "paper_1_self_play" group: "verify" - mode: "disabled" # Options: online, offline, disabled + mode: "online" # Options: online, offline, disabled tags: ["ppo", "ff"] ## NOTES @@ -49,16 +49,16 @@ train: resample_scenes: true resample_criterion: "global_step" resample_dataset_size: 500 # Number of unique scenes to sample from - resample_interval: 1_000_000 + resample_interval: 20_000_000 resample_limit: 1000 # Resample until the limit is reached; set to a large number to continue resampling indefinitely sample_with_replacement: true shuffle_dataset: false # # # PPO # # # torch_deterministic: false - total_timesteps: 1_000_000 + total_timesteps: 1_000_000_000 batch_size: 131_072 - minibatch_size: 16_384 + minibatch_size: 16384 learning_rate: 3e-4 anneal_lr: false gamma: 0.99 @@ -76,7 +76,7 @@ train: # # # Network # # # network: input_dim: 64 # Embedding of the input features - hidden_dim: 128 # Latent dimension + hidden_dim: 192 # Latent dimension pred_heads_arch: [64] # Arch of the prediction heads (actor and critic) num_transformer_layers: 0 # Number of transformer layers dropout: 0.01 diff --git a/integrations/rl/puffer/puffer_env.py b/integrations/rl/puffer/puffer_env.py index edf9b48c..a602fb2a 100644 --- a/integrations/rl/puffer/puffer_env.py +++ b/integrations/rl/puffer/puffer_env.py @@ -420,7 +420,6 @@ def render_agent_observations(self, env_idx): def resample_scenario_batch(self): """Sample and set new batch of WOMD scenarios.""" - return # Swap the data batch self.env.swap_data_batch() @@ -432,7 +431,7 @@ def resample_scenario_batch(self): self.reset() # Reset storage # Get info from new worlds - self.observations = self.env.reset()[self.controlled_agent_mask] + self.observations = self.env.reset(self.controlled_agent_mask) self.log_data_coverage() diff --git a/integrations/rl/puffer/utils.py b/integrations/rl/puffer/utils.py index 1b76feeb..9659c9ba 100644 --- a/integrations/rl/puffer/utils.py +++ b/integrations/rl/puffer/utils.py @@ -96,7 +96,6 @@ def encode_observations(self, observations): observations, self.env ) ego_embed = self.ego_embed(ego_state) - partner_embed, _ = self.partner_embed(road_objects).max(dim=1) road_map_embed, _ = self.road_map_embed(road_graph).max(dim=1) embed = torch.cat([ego_embed, partner_embed, road_map_embed], dim=1) diff --git a/networks/late_fusion.py b/networks/late_fusion.py index 42dac161..2e282828 100644 --- a/networks/late_fusion.py +++ b/networks/late_fusion.py @@ -109,11 +109,11 @@ def __init__( self, action_dim, input_dim=64, - hidden_dim=128, + hidden_dim=192, pred_heads_arch=[128], num_transformer_layers=0, dropout=0.00, - act_func="tanh", + act_func="relu", ): super().__init__() self.input_dim = input_dim @@ -129,9 +129,9 @@ def __init__( pufferlib.pytorch.layer_init( nn.Linear(constants.EGO_FEAT_DIM, input_dim) ), - nn.LayerNorm(input_dim), + #nn.LayerNorm(input_dim), self.act_func, - nn.Dropout(self.dropout), + #nn.Dropout(self.dropout), pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), ) @@ -139,9 +139,9 @@ def __init__( pufferlib.pytorch.layer_init( nn.Linear(constants.PARTNER_FEAT_DIM, input_dim) ), - nn.LayerNorm(input_dim), + #nn.LayerNorm(input_dim), self.act_func, - nn.Dropout(self.dropout), + #nn.Dropout(self.dropout), pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), ) @@ -149,16 +149,19 @@ def __init__( pufferlib.pytorch.layer_init( nn.Linear(constants.ROAD_GRAPH_FEAT_DIM, input_dim) ), - nn.LayerNorm(input_dim), + #nn.LayerNorm(input_dim), self.act_func, - nn.Dropout(self.dropout), + #nn.Dropout(self.dropout), pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), ) + ''' self.shared_embed = nn.Sequential( nn.Linear(self.input_dim * self.num_modes, self.hidden_dim), - nn.Dropout(self.dropout) + #nn.Dropout(self.dropout) + self.act_func, ) + ''' if self.num_transformer_layers > 0: self.transformer_layers = nn.Sequential( @@ -190,7 +193,8 @@ def encode_observations(self, observation): if self.num_transformer_layers > 0: embed = self.transformer_layers(embed) - return self.shared_embed(embed) + return embed + #return self.shared_embed(embed) def forward(self, obs, action=None, deterministic=False): diff --git a/pygpudrive/datatypes/observation.py b/pygpudrive/datatypes/observation.py index a0aea26b..70e5861a 100644 --- a/pygpudrive/datatypes/observation.py +++ b/pygpudrive/datatypes/observation.py @@ -1,10 +1,9 @@ import torch from dataclasses import dataclass from pygpudrive.env import constants -from pygpudrive.utils.geometry import normalize_min_max +from pygpudrive.utils.geometry import normalize_min_max, normalize_min_max_inplace import gpudrive - class LocalEgoState: """A class to represent the ego state of the agent in relative coordinates. Initialized from self_observation_tensor (src/bindings). For details, see @@ -23,6 +22,8 @@ class LocalEgoState: def __init__(self, self_obs_tensor: torch.Tensor): """Initializes the ego state with an observation tensor.""" + self.data = self_obs_tensor[:, :7] + ''' self.speed = self_obs_tensor[:, 0] self.vehicle_length = self_obs_tensor[:, 1] self.vehicle_width = self_obs_tensor[:, 2] @@ -31,6 +32,7 @@ def __init__(self, self_obs_tensor: torch.Tensor): self.rel_goal_y = self_obs_tensor[:, 5] self.is_collided = self_obs_tensor[:, 6] self.id = self_obs_tensor[:, 7] + ''' @classmethod def from_tensor( @@ -42,12 +44,32 @@ def from_tensor( ): """Creates an LocalEgoState from the agent_observation_tensor.""" if backend == "torch": - return cls(self_obs_tensor.to_torch()[mask].to(device)) + obj = cls(self_obs_tensor.to_torch()[mask].to(device)) + obj.norm = torch.Tensor([ + constants.MAX_SPEED, + constants.MAX_VEH_LEN, + constants.MAX_VEH_WIDTH, + constants.MAX_VEH_HEIGHT + ]).to(device) + return obj elif backend == "jax": raise NotImplementedError("JAX backend not implemented yet.") def normalize(self): """Normalizes the ego state to be between -1 and 1.""" + self.data[:, :4] /= self.norm + normalize_min_max_inplace( + self.data[:, 4], + min_val=constants.MIN_REL_GOAL_COORD, + max_val=constants.MAX_REL_GOAL_COORD, + ) + normalize_min_max_inplace( + self.data[:, 5], + min_val=constants.MIN_REL_GOAL_COORD, + max_val=constants.MAX_REL_GOAL_COORD, + ) + + ''' self.speed = self.speed / constants.MAX_SPEED self.vehicle_length = self.vehicle_length / constants.MAX_VEH_LEN self.vehicle_width = self.vehicle_width / constants.MAX_VEH_WIDTH @@ -64,6 +86,7 @@ def normalize(self): ) self.is_collided = self.is_collided self.id = self.id + ''' @property def shape(self) -> tuple[int, ...]: @@ -149,6 +172,8 @@ class PartnerObs: def __init__(self, partner_obs_tensor: torch.Tensor): """Initializes the partner observation from a tensor.""" + self.data = partner_obs_tensor + ''' self.speed = partner_obs_tensor[:, :, 0].unsqueeze(-1) self.rel_pos_x = partner_obs_tensor[:, :, 1].unsqueeze(-1) self.rel_pos_y = partner_obs_tensor[:, :, 2].unsqueeze(-1) @@ -158,6 +183,7 @@ def __init__(self, partner_obs_tensor: torch.Tensor): self.vehicle_height = partner_obs_tensor[:, :, 6].unsqueeze(-1) self.agent_type = partner_obs_tensor[:, :, 7].unsqueeze(-1) self.ids = partner_obs_tensor[:, :, 8].unsqueeze(-1) + ''' @classmethod def from_tensor( @@ -169,29 +195,40 @@ def from_tensor( ): """Creates an PartnerObs from a tensor.""" if backend == "torch": - return cls(partner_obs_tensor.to_torch()[mask].to(device)) + obj = cls(partner_obs_tensor.to_torch()[mask].to(device)) + obj.norm = torch.Tensor([ + constants.MAX_ORIENTATION_RAD, + constants.MAX_VEH_LEN, + constants.MAX_VEH_WIDTH, + constants.MAX_VEH_HEIGHT + ]).to(device) + return obj + elif backend == "jax": raise NotImplementedError("JAX backend not implemented yet.") def normalize(self): """Normalizes the partner observation.""" - self.speed = self.speed / constants.MAX_SPEED - self.rel_pos_x = normalize_min_max( - tensor=self.rel_pos_x, + self.data[:, :, 0] /= constants.MAX_SPEED + normalize_min_max_inplace( + tensor=self.data[:, :, 1], min_val=constants.MIN_REL_GOAL_COORD, max_val=constants.MAX_REL_GOAL_COORD, ) - self.rel_pos_y = normalize_min_max( - tensor=self.rel_pos_y, + normalize_min_max_inplace( + tensor=self.data[:, :, 2], min_val=constants.MIN_REL_GOAL_COORD, max_val=constants.MAX_REL_GOAL_COORD, ) + self.data[:, :, 3:7] /= self.norm + ''' self.orientation = self.orientation / constants.MAX_ORIENTATION_RAD self.vehicle_length = self.vehicle_length / constants.MAX_VEH_LEN self.vehicle_width = self.vehicle_width / constants.MAX_VEH_WIDTH self.vehicle_height = self.vehicle_height / constants.MAX_VEH_HEIGHT self.agent_type = self.agent_type.long() self.ids = self.ids + ''' def one_hot_encode_agent_types(self): """One-hot encodes the agent types. This operation increases the diff --git a/pygpudrive/datatypes/roadgraph.py b/pygpudrive/datatypes/roadgraph.py index 2d8e5fba..7bcfdb0f 100644 --- a/pygpudrive/datatypes/roadgraph.py +++ b/pygpudrive/datatypes/roadgraph.py @@ -2,7 +2,7 @@ import torch import enum import gpudrive -from pygpudrive.utils.geometry import normalize_min_max +from pygpudrive.utils.geometry import normalize_min_max, normalize_min_max_inplace from pygpudrive.env import constants @@ -135,8 +135,9 @@ def __init__(self, local_roadgraph_tensor: torch.Tensor): self.segment_width = local_roadgraph_tensor[:, :, 3] self.segment_height = local_roadgraph_tensor[:, :, 4] self.orientation = local_roadgraph_tensor[:, :, 5] - self.id = local_roadgraph_tensor[:, :, 7] + #self.id = local_roadgraph_tensor[:, :, 7] # TODO(dc): Use map type instead of enum (8 instead of 6) + self.data = local_roadgraph_tensor[:, :, :6] self.type = local_roadgraph_tensor[:, :, 6].long() @classmethod @@ -155,12 +156,12 @@ def from_tensor( def normalize(self): """Normalizes the road graph points to [-1, 1].""" - self.x = normalize_min_max( + normalize_min_max_inplace( self.x, min_val=constants.MIN_RG_COORD, max_val=constants.MAX_RG_COORD, ) - self.y = normalize_min_max( + normalize_min_max_inplace( self.y, min_val=constants.MIN_RG_COORD, max_val=constants.MAX_RG_COORD, @@ -171,7 +172,7 @@ def normalize(self): self.segment_width = self.segment_width / constants.MAX_ROAD_SCALE self.segment_height = self.segment_height # / constants.MAX_ROAD_SCALE self.orientation = self.orientation / constants.MAX_ORIENTATION_RAD - self.id = self.id + #self.id = self.id def one_hot_encode_road_point_types(self): """One-hot encodes the type of road point.""" diff --git a/pygpudrive/env/env_torch.py b/pygpudrive/env/env_torch.py index c27e24da..9955b9aa 100755 --- a/pygpudrive/env/env_torch.py +++ b/pygpudrive/env/env_torch.py @@ -373,7 +373,9 @@ def _get_ego_state(self, mask) -> torch.Tensor: # TODO: I deleted this permute. Was it needed? #.permute(1, 2, 0) - return [ + return [ego_state.data] + ''' + [ ego_state.speed.unsqueeze(-1), ego_state.vehicle_length.unsqueeze(-1), ego_state.vehicle_width.unsqueeze(-1), @@ -381,6 +383,7 @@ def _get_ego_state(self, mask) -> torch.Tensor: ego_state.rel_goal_y.unsqueeze(-1), ego_state.is_collided.unsqueeze(-1), ] + ''' def _get_partner_obs(self, mask): """Get partner observations.""" @@ -395,9 +398,11 @@ def _get_partner_obs(self, mask): if self.config.norm_obs: partner_obs.normalize() - partner_obs.one_hot_encode_agent_types() + #partner_obs.one_hot_encode_agent_types() - return [ + return [partner_obs.data.flatten(start_dim=1)] + ''' + [ partner_obs.speed.squeeze(-1), partner_obs.rel_pos_x.squeeze(-1), partner_obs.rel_pos_y.squeeze(-1), @@ -405,6 +410,7 @@ def _get_partner_obs(self, mask): partner_obs.vehicle_length.squeeze(-1), partner_obs.vehicle_width.squeeze(-1), ] + ''' def _get_road_map_obs(self, mask): """Get road map observations.""" @@ -422,12 +428,13 @@ def _get_road_map_obs(self, mask): roadgraph.one_hot_encode_road_point_types() return [ - roadgraph.x, - roadgraph.y, - roadgraph.segment_length, - roadgraph.segment_width, - roadgraph.segment_height, - roadgraph.orientation, + #roadgraph.x, + #roadgraph.y, + #roadgraph.segment_length, + #roadgraph.segment_width, + #roadgraph.segment_height, + #roadgraph.orientation, + roadgraph.data.flatten(start_dim=1), roadgraph.type.flatten(start_dim=1), ] @@ -453,15 +460,10 @@ def get_obs(self, mask=None): Returns: torch.Tensor: (num_worlds, max_agent_count, num_features) """ - ego_states = self._get_ego_state(mask) - partner_observations = self._get_partner_obs(mask) - road_map_observations = self._get_road_map_obs(mask) - lidar_obs = self._get_lidar_obs(mask) - obs = ego_states + partner_observations + road_map_observations + lidar_obs return torch.cat(obs, dim=-1) diff --git a/pygpudrive/utils/geometry.py b/pygpudrive/utils/geometry.py index 5b102e7b..84b26dee 100644 --- a/pygpudrive/utils/geometry.py +++ b/pygpudrive/utils/geometry.py @@ -24,3 +24,15 @@ def normalize_min_max(tensor, min_val, max_val): np.array: Normalized array of values. """ return 2 * ((tensor - min_val) / (max_val - min_val)) - 1 + +def normalize_min_max_inplace(tensor, min_val, max_val): + """Normalizes an array of values to the range [-1, 1]. + + Args: + x (np.array): Array of values to normalize. + min_val (float): Minimum value for normalization. + max_val (float): Maximum value for normalization. + """ + tensor[:] = 2 * ((tensor - min_val) / (max_val - min_val)) - 1 + + diff --git a/src/consts.hpp b/src/consts.hpp index 98822e5a..90be2638 100644 --- a/src/consts.hpp +++ b/src/consts.hpp @@ -8,7 +8,7 @@ namespace gpudrive { namespace consts { -inline constexpr madrona::CountT kMaxAgentCount = 128; +inline constexpr madrona::CountT kMaxAgentCount = 64; inline constexpr madrona::CountT kMaxRoadEntityCount = 10000; inline constexpr madrona::CountT kMaxAgentMapObservationsCount = 200; From ba4420a5d143480f2d16403bb2333365054f75f3 Mon Sep 17 00:00:00 2001 From: Daphne Cornelisse Date: Tue, 14 Jan 2025 10:17:04 -0500 Subject: [PATCH 04/10] Tiny param updates to ensure fair benchmarking --- examples/experiments/ippo_ff_p1_self_play.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/experiments/ippo_ff_p1_self_play.yaml b/examples/experiments/ippo_ff_p1_self_play.yaml index ae81da40..d0e70be9 100644 --- a/examples/experiments/ippo_ff_p1_self_play.yaml +++ b/examples/experiments/ippo_ff_p1_self_play.yaml @@ -7,7 +7,7 @@ data_dir: "data/processed/examples" environment: # Overrides default environment configs (see pygpudrive/env/config.py) name: "gpudrive" num_worlds: 300 # Number of parallel environments - k_unique_scenes: 100 # Number of unique scenes to sample from + k_unique_scenes: 3 # Number of unique scenes to sample from max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp ego_state: true road_map_obs: true @@ -46,7 +46,7 @@ train: compile_mode: "reduce-overhead" # # # Data sampling # # # - resample_scenes: true + resample_scenes: false resample_criterion: "global_step" resample_dataset_size: 500 # Number of unique scenes to sample from resample_interval: 20_000_000 @@ -63,7 +63,7 @@ train: anneal_lr: false gamma: 0.99 gae_lambda: 0.95 - update_epochs: 1 + update_epochs: 5 norm_adv: true clip_coef: 0.2 clip_vloss: false From 895d2dc0c4e61881e8d0454dab94413ca425727f Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 14 Jan 2025 16:31:18 +0000 Subject: [PATCH 05/10] Fixed training. Just had some bad obs norms --- examples/experiments/ippo_ff_p1_self_play.yaml | 4 ++-- pygpudrive/datatypes/observation.py | 2 +- pygpudrive/datatypes/roadgraph.py | 13 ++++++++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/examples/experiments/ippo_ff_p1_self_play.yaml b/examples/experiments/ippo_ff_p1_self_play.yaml index d0e70be9..7391c7da 100644 --- a/examples/experiments/ippo_ff_p1_self_play.yaml +++ b/examples/experiments/ippo_ff_p1_self_play.yaml @@ -46,7 +46,7 @@ train: compile_mode: "reduce-overhead" # # # Data sampling # # # - resample_scenes: false + resample_scenes: False resample_criterion: "global_step" resample_dataset_size: 500 # Number of unique scenes to sample from resample_interval: 20_000_000 @@ -63,7 +63,7 @@ train: anneal_lr: false gamma: 0.99 gae_lambda: 0.95 - update_epochs: 5 + update_epochs: 1 norm_adv: true clip_coef: 0.2 clip_vloss: false diff --git a/pygpudrive/datatypes/observation.py b/pygpudrive/datatypes/observation.py index 70e5861a..d8a0e68f 100644 --- a/pygpudrive/datatypes/observation.py +++ b/pygpudrive/datatypes/observation.py @@ -172,7 +172,7 @@ class PartnerObs: def __init__(self, partner_obs_tensor: torch.Tensor): """Initializes the partner observation from a tensor.""" - self.data = partner_obs_tensor + self.data = partner_obs_tensor[:, :, :7] ''' self.speed = partner_obs_tensor[:, :, 0].unsqueeze(-1) self.rel_pos_x = partner_obs_tensor[:, :, 1].unsqueeze(-1) diff --git a/pygpudrive/datatypes/roadgraph.py b/pygpudrive/datatypes/roadgraph.py index 7bcfdb0f..0bc0c582 100644 --- a/pygpudrive/datatypes/roadgraph.py +++ b/pygpudrive/datatypes/roadgraph.py @@ -150,7 +150,15 @@ def from_tensor( ): """Creates a GlobalRoadGraphPoints instance from a tensor.""" if backend == "torch": - return cls(local_roadgraph_tensor.to_torch().to(device)[mask]) + obj = cls(local_roadgraph_tensor.to_torch().to(device)[mask]) + obj.norm = torch.Tensor([ + constants.MAX_ROAD_LINE_SEGMENT_LEN, + constants.MAX_ROAD_SCALE, + constants.MAX_ROAD_SCALE, + constants.MAX_ORIENTATION_RAD + ]).to(device) + return obj + elif backend == "jax": raise NotImplementedError("JAX backend not implemented yet.") @@ -166,12 +174,15 @@ def normalize(self): min_val=constants.MIN_RG_COORD, max_val=constants.MAX_RG_COORD, ) + self.data[:, :, 2:6] /= self.norm + ''' self.segment_length = ( self.segment_length / constants.MAX_ROAD_LINE_SEGMENT_LEN ) self.segment_width = self.segment_width / constants.MAX_ROAD_SCALE self.segment_height = self.segment_height # / constants.MAX_ROAD_SCALE self.orientation = self.orientation / constants.MAX_ORIENTATION_RAD + ''' #self.id = self.id def one_hot_encode_road_point_types(self): From c790929b13c8f6a6d354d685ed8777b28d87490b Mon Sep 17 00:00:00 2001 From: Daphne Cornelisse Date: Tue, 14 Jan 2025 12:09:53 -0500 Subject: [PATCH 06/10] Update vehicle length and width constants for obs normalization. --- pygpudrive/env/constants.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygpudrive/env/constants.py b/pygpudrive/env/constants.py index 45a87aac..4dc963a8 100644 --- a/pygpudrive/env/constants.py +++ b/pygpudrive/env/constants.py @@ -5,8 +5,8 @@ # Observation space constants MAX_SPEED = 100 MAX_VEH_LEN = 30 -MAX_VEH_WIDTH = 10 -MAX_VEH_HEIGHT = 3 +MAX_VEH_WIDTH = 15 +MAX_VEH_HEIGHT = 5 MIN_REL_GOAL_COORD = -1000 MAX_REL_GOAL_COORD = 1000 MIN_REL_AGENT_POS = -1000 @@ -21,5 +21,5 @@ # Feature shape constants EGO_FEAT_DIM = 6 -PARTNER_FEAT_DIM = 6 # 10 +PARTNER_FEAT_DIM = 6 ROAD_GRAPH_FEAT_DIM = 13 From 020d7b00f701d5b78f7dce63f9fb95e2953ce02f Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 14 Jan 2025 17:50:55 +0000 Subject: [PATCH 07/10] Sweeps integration. Set tag and run from as many machines as you like --- baselines/ippo/ippo_pufferlib.py | 144 +++++++++++------- .../experiments/ippo_ff_p1_self_play.yaml | 34 ++++- integrations/rl/puffer/ppo.py | 2 +- 3 files changed, 123 insertions(+), 57 deletions(-) diff --git a/baselines/ippo/ippo_pufferlib.py b/baselines/ippo/ippo_pufferlib.py index 42c39188..a583609f 100644 --- a/baselines/ippo/ippo_pufferlib.py +++ b/baselines/ippo/ippo_pufferlib.py @@ -15,6 +15,8 @@ import numpy as np import wandb from box import Box +import time +import random from integrations.rl.puffer import ppo from integrations.rl.puffer.puffer_env import env_creator @@ -32,6 +34,73 @@ app = Typer() +def log_normal(mean, scale, clip): + '''Samples normally spaced points on a log 10 scale. + mean: Your center sample point + scale: standard deviation in base 10 orders of magnitude + clip: maximum standard deviations + + Example: mean=0.001, scale=1, clip=2 will produce data from + 0.1 to 0.00001 with most of it between 0.01 and 0.0001 + ''' + return 10**np.clip( + np.random.normal( + np.log10(mean), + scale, + ), + a_min = np.log10(mean) - clip, + a_max = np.log10(mean) + clip, + ) + +def logit_normal(mean, scale, clip): + '''log normal but for logit data like gamma and gae_lambda''' + return 1 - log_normal(1 - mean, scale, clip) + +def uniform_pow2(min, max): + '''Uniform distribution over powers of 2 between min and max inclusive''' + min_base = np.log2(min) + max_base = np.log2(max) + return 2**np.random.randint(min_base, max_base+1) + +def uniform(min, max): + '''Uniform distribution between min and max inclusive''' + return np.random.uniform(min, max) + +def int_uniform(min, max): + '''Uniform distribution between min and max inclusive''' + return np.random.randint(min, max+1) + +def sample_hyperparameters(sweep_config): + samples = {} + for name, param in sweep_config.items(): + if name in ('method', 'name', 'metric'): + continue + + assert isinstance(param, dict) + if any(isinstance(param[k], dict) for k in param): + samples[name] = sample_hyperparameters(param) + elif 'values' in param: + assert 'distribution' not in param + samples[name] = random.choice(param['values']) + elif 'distribution' in param: + if param['distribution'] == 'uniform': + samples[name] = uniform(param['min'], param['max']) + elif param['distribution'] == 'int_uniform': + samples[name] = int_uniform(param['min'], param['max']) + elif param['distribution'] == 'uniform_pow2': + samples[name] = uniform_pow2(param['min'], param['max']) + elif param['distribution'] == 'log_normal': + samples[name] = log_normal( + param['mean'], param['scale'], param['clip']) + elif param['distribution'] == 'logit_normal': + samples[name] = logit_normal( + param['mean'], param['scale'], param['clip']) + else: + raise ValueError(f'Invalid distribution: {param["distribution"]}') + else: + raise ValueError('Must specify either values or distribution') + + return samples def get_model_parameters(policy): """Helper function to count the number of trainable parameters.""" @@ -56,31 +125,9 @@ def make_policy(env, config): dropout=config.train.network.dropout, ).to(config.train.device) - -def train(args, make_env): +def train(args, vecenv): """Main training loop for the PPO agent.""" - backend_mapping = { - # Note: Only native backend is currently supported with GPUDrive - "native": pufferlib.vector.Native, - "serial": pufferlib.vector.Serial, - "multiprocessing": pufferlib.vector.Multiprocessing, - "ray": pufferlib.vector.Ray, - } - - backend = backend_mapping.get(args.vec.backend) - if not backend: - raise ValueError("Invalid --vec.backend.") - - vecenv = pufferlib.vector.make( - make_env, - num_envs=1, # GPUDrive is already batched - num_workers=args.vec.num_workers, - batch_size=args.vec.env_batch_size, - zero_copy=args.vec.zero_copy, - backend=backend, - ) - policy = make_policy(env=vecenv.driver_env, config=args).to( args.train.device ) @@ -88,10 +135,10 @@ def train(args, make_env): args.train.network.num_parameters = get_model_parameters(policy) args.train.env = args.environment.name - args.wandb = init_wandb(args, args.train.exp_id, id=args.train.exp_id) - args.train.__dict__.update(dict(args.wandb.config.train)) + wandb_run = init_wandb(args, args.train.exp_id, id=args.train.exp_id) + args.train.update(dict(wandb_run.config.train)) - data = ppo.create(args.train, vecenv, policy, wandb=args.wandb) + data = ppo.create(args.train, vecenv, policy, wandb=wandb_run) while data.global_step < args.train.total_timesteps: try: ppo.evaluate(data) # Rollout @@ -107,8 +154,7 @@ def train(args, make_env): ppo.evaluate(data) ppo.close(data) - -def init_wandb(args, name, id=None, resume=True): +def init_wandb(args, name, id=None, resume=True, tag=None): wandb.init( id=id or wandb.util.generate_id(), project=args.wandb.project, @@ -128,29 +174,6 @@ def init_wandb(args, name, id=None, resume=True): return wandb - -def sweep(args, project="PPO", sweep_name="my_sweep"): - """Initialize a WandB sweep with hyperparameters.""" - sweep_id = wandb.sweep( - sweep=dict( - method="random", - name=sweep_name, - metric={"goal": "maximize", "name": "environment/episode_return"}, - parameters={ - "learning_rate": { - "distribution": "log_uniform_values", - "min": 1e-4, - "max": 1e-1, - }, - "batch_size": {"values": [512, 1024, 2048]}, - "minibatch_size": {"values": [128, 256, 512]}, - }, - ), - project=project, - ) - wandb.agent(sweep_id, lambda: train(args), count=100) - - @app.command() def run( config_path: Annotated[ @@ -185,6 +208,7 @@ def run( project: Annotated[Optional[str], typer.Option(help="WandB project name")] = None, entity: Annotated[Optional[str], typer.Option(help="WandB entity name")] = None, group: Annotated[Optional[str], typer.Option(help="WandB group name")] = None, + max_runs: Annotated[Optional[int], typer.Option(help="Maximum number of sweep runs")] = 100, render: Annotated[Optional[int], typer.Option(help="Whether to render the environment; 0 or 1")] = None, ): """Run PPO training with the given configuration.""" @@ -279,9 +303,25 @@ def run( train_config=config.train, device=config.train.device, ) + vecenv = pufferlib.vector.make( + make_env, + num_envs=1, # GPUDrive is already batched + num_workers=config.vec.num_workers, + batch_size=config.vec.env_batch_size, + zero_copy=config.vec.zero_copy, + backend=pufferlib.vector.Native, + ) if config.mode == "train": - train(config, make_env) + train(config, vecenv) + elif config.mode == "sweep": + for i in range(max_runs): + np.random.seed(int(time.time())) + random.seed(int(time.time())) + hypers = sample_hyperparameters(config.sweep) + config.train.update(hypers['train']) + train(config, vecenv) + if __name__ == "__main__": diff --git a/examples/experiments/ippo_ff_p1_self_play.yaml b/examples/experiments/ippo_ff_p1_self_play.yaml index 7391c7da..90745133 100644 --- a/examples/experiments/ippo_ff_p1_self_play.yaml +++ b/examples/experiments/ippo_ff_p1_self_play.yaml @@ -1,4 +1,4 @@ -mode: "train" +mode: "sweep" use_rnn: false eval_model_path: null baseline: false @@ -26,11 +26,11 @@ environment: # Overrides default environment configs (see pygpudrive/env/config. sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random obs_radius: 50.0 # Visibility radius of the agents wandb: - entity: "" + entity: "jsuarez" project: "paper_1_self_play" group: "verify" mode: "online" # Options: online, offline, disabled - tags: ["ppo", "ff"] + tags: ["ppo", "ff", "basic-sweep"] ## NOTES ## Good batch size: 128 * number of controlled agents (e.g. 2**18) @@ -56,7 +56,7 @@ train: # # # PPO # # # torch_deterministic: false - total_timesteps: 1_000_000_000 + total_timesteps: 50_000_000 batch_size: 131_072 minibatch_size: 16384 learning_rate: 3e-4 @@ -96,6 +96,32 @@ train: render_format: "mp4" # Options: gif, mp4 render_fps: 15 # Frames per second +sweep: + train: + learning_rate: + distribution: "log_normal" + mean: 0.005 + scale: 1.0 + clip: 2.0 + + ent_coef: + distribution: "log_normal" + mean: 0.005 + scale: 0.5 + clip: 1.0 + + gamma: + distribution: "logit_normal" + mean: 0.98 + scale: 0.5 + clip: 1.0 + + gae_lambda: + distribution: "logit_normal" + mean: 0.95 + scale: 0.5 + clip: 1.0 + vec: backend: "native" # Only native is currently supported num_workers: 1 diff --git a/integrations/rl/puffer/ppo.py b/integrations/rl/puffer/ppo.py index 4a82a513..e5191a31 100644 --- a/integrations/rl/puffer/ppo.py +++ b/integrations/rl/puffer/ppo.py @@ -391,7 +391,7 @@ def train(data): def close(data): - data.vecenv.close() + #data.vecenv.close() data.utilization.stop() config = data.config if data.wandb is not None: From 48d2387d77d7fc79e8f34006d55f581916b4dcaf Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 14 Jan 2025 18:41:48 +0000 Subject: [PATCH 08/10] Sweeps fixed --- baselines/ippo/ippo_pufferlib.py | 37 ++++++++++--------- .../experiments/ippo_ff_p1_self_play.yaml | 16 ++++++++ 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/baselines/ippo/ippo_pufferlib.py b/baselines/ippo/ippo_pufferlib.py index a583609f..0c1abe62 100644 --- a/baselines/ippo/ippo_pufferlib.py +++ b/baselines/ippo/ippo_pufferlib.py @@ -154,6 +154,23 @@ def train(args, vecenv): ppo.evaluate(data) ppo.close(data) +def set_experiment_metadata(config): + datetime_ = datetime.now().strftime("%m_%d_%H_%M_%S_%f")[:-3] + if config["train"]["resample_scenes"]: + if config["train"]["resample_scenes"]: + dataset_size = config["train"]["resample_dataset_size"] + config["train"][ + "exp_id" + ] = f'PPO_R_{dataset_size}__{datetime_}' + else: + dataset_size = str(config["environment"]["k_unique_scenes"]) + config["train"][ + "exp_id" + ] = f'PPO_S_{dataset_size}__{datetime_}' + + config["environment"]["dataset_size"] = dataset_size + + def init_wandb(args, name, id=None, resume=True, tag=None): wandb.init( id=id or wandb.util.generate_id(), @@ -264,21 +281,6 @@ def run( {k: v for k, v in wandb_config.items() if v is not None} ) - datetime_ = datetime.now().strftime("%m_%d_%H_%M_%S_%f")[:-3] - - if config["train"]["resample_scenes"]: - if config["train"]["resample_scenes"]: - dataset_size = config["train"]["resample_dataset_size"] - config["train"][ - "exp_id" - ] = f'{config["train"]["exp_id"]}__R_{dataset_size}__{datetime_}' - else: - dataset_size = str(config["environment"]["k_unique_scenes"]) - config["train"][ - "exp_id" - ] = f'{config["train"]["exp_id"]}__S_{dataset_size}__{datetime_}' - - config["environment"]["dataset_size"] = dataset_size config["train"]["device"] = config["train"].get( "device", "cpu" ) # Default to 'cpu' if not set @@ -313,17 +315,18 @@ def run( ) if config.mode == "train": + set_experiment_metadata(config) train(config, vecenv) elif config.mode == "sweep": for i in range(max_runs): np.random.seed(int(time.time())) random.seed(int(time.time())) + set_experiment_metadata(config) hypers = sample_hyperparameters(config.sweep) config.train.update(hypers['train']) + config.environment.update(hypers['environment']) train(config, vecenv) - - if __name__ == "__main__": import cProfile cProfile.run('app()', 'profiled') diff --git a/examples/experiments/ippo_ff_p1_self_play.yaml b/examples/experiments/ippo_ff_p1_self_play.yaml index 90745133..9a597870 100644 --- a/examples/experiments/ippo_ff_p1_self_play.yaml +++ b/examples/experiments/ippo_ff_p1_self_play.yaml @@ -122,6 +122,22 @@ sweep: scale: 0.5 clip: 1.0 + environment: + collision_weight: + distribution: "uniform" + min: -1.0 + max: 0.0 + + off_road_weight: + distribution: "uniform" + min: -1.0 + max: 0.0 + + goal_achieved_weight: + distribution: "uniform" + min: 0.0 + max: 1.0 + vec: backend: "native" # Only native is currently supported num_workers: 1 From e56e42696e310502668daa0c774ea132e3ed9e6d Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 21 Jan 2025 19:08:50 +0000 Subject: [PATCH 09/10] Match baseline --- baselines/ippo/ippo_pufferlib.py | 4 ++-- .../experiments/ippo_ff_p1_self_play.yaml | 14 +++++------ networks/late_fusion.py | 24 ++++++++----------- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/baselines/ippo/ippo_pufferlib.py b/baselines/ippo/ippo_pufferlib.py index 2efdcdbc..35cdb93f 100644 --- a/baselines/ippo/ippo_pufferlib.py +++ b/baselines/ippo/ippo_pufferlib.py @@ -333,5 +333,5 @@ def run( if __name__ == "__main__": import cProfile - cProfile.run('app()', 'profiled') - #app() + #cProfile.run('app()', 'profiled') + app() diff --git a/examples/experiments/ippo_ff_p1_self_play.yaml b/examples/experiments/ippo_ff_p1_self_play.yaml index 56b4726d..56d5b2c0 100644 --- a/examples/experiments/ippo_ff_p1_self_play.yaml +++ b/examples/experiments/ippo_ff_p1_self_play.yaml @@ -27,10 +27,10 @@ environment: # Overrides default environment configs (see pygpudrive/env/config. obs_radius: 50.0 # Visibility radius of the agents wandb: entity: "jsuarez" - project: "self_play_rl_safe" + project: "puffer" group: "small_experiments" mode: "online" # Options: online, offline, disabled - tags: ["ppo", "ff", "basic-sweep"] + tags: ["ppo", "ff"] train: exp_id: PPO # Set dynamically in the script if needed @@ -45,27 +45,25 @@ train: resample_scenes: false resample_dataset_size: 500 # Number of unique scenes to sample from resample_interval: 1_000_000 - resample_criterion: "global_step" - resample_limit: 1000 # Resample until the limit is reached; set to a large number to continue resampling indefinitely sample_with_replacement: true shuffle_dataset: false # # # PPO # # # torch_deterministic: false - total_timesteps: 50_000_000 + total_timesteps: 200_000_000 batch_size: 131_072 - minibatch_size: 16384 + minibatch_size: 8192 learning_rate: 3e-4 anneal_lr: false gamma: 0.99 gae_lambda: 0.95 - update_epochs: 1 + update_epochs: 2 norm_adv: true clip_coef: 0.2 clip_vloss: false vf_clip_coef: 0.2 ent_coef: 0.0001 - vf_coef: 0.5 + vf_coef: 0.4 max_grad_norm: 0.5 target_kl: null log_window: 500 diff --git a/networks/late_fusion.py b/networks/late_fusion.py index 2e282828..42dac161 100644 --- a/networks/late_fusion.py +++ b/networks/late_fusion.py @@ -109,11 +109,11 @@ def __init__( self, action_dim, input_dim=64, - hidden_dim=192, + hidden_dim=128, pred_heads_arch=[128], num_transformer_layers=0, dropout=0.00, - act_func="relu", + act_func="tanh", ): super().__init__() self.input_dim = input_dim @@ -129,9 +129,9 @@ def __init__( pufferlib.pytorch.layer_init( nn.Linear(constants.EGO_FEAT_DIM, input_dim) ), - #nn.LayerNorm(input_dim), + nn.LayerNorm(input_dim), self.act_func, - #nn.Dropout(self.dropout), + nn.Dropout(self.dropout), pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), ) @@ -139,9 +139,9 @@ def __init__( pufferlib.pytorch.layer_init( nn.Linear(constants.PARTNER_FEAT_DIM, input_dim) ), - #nn.LayerNorm(input_dim), + nn.LayerNorm(input_dim), self.act_func, - #nn.Dropout(self.dropout), + nn.Dropout(self.dropout), pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), ) @@ -149,19 +149,16 @@ def __init__( pufferlib.pytorch.layer_init( nn.Linear(constants.ROAD_GRAPH_FEAT_DIM, input_dim) ), - #nn.LayerNorm(input_dim), + nn.LayerNorm(input_dim), self.act_func, - #nn.Dropout(self.dropout), + nn.Dropout(self.dropout), pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), ) - ''' self.shared_embed = nn.Sequential( nn.Linear(self.input_dim * self.num_modes, self.hidden_dim), - #nn.Dropout(self.dropout) - self.act_func, + nn.Dropout(self.dropout) ) - ''' if self.num_transformer_layers > 0: self.transformer_layers = nn.Sequential( @@ -193,8 +190,7 @@ def encode_observations(self, observation): if self.num_transformer_layers > 0: embed = self.transformer_layers(embed) - return embed - #return self.shared_embed(embed) + return self.shared_embed(embed) def forward(self, obs, action=None, deterministic=False): From db8d06babfbf4354a51b11356752a0efb863f33f Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 22 Jan 2025 01:19:43 +0000 Subject: [PATCH 10/10] Current working model : --- .../experiments/eval/config/eval_config.yaml | 8 +++---- .../experiments/eval/config/model_config.yaml | 13 ++++------- examples/experiments/eval/evaluate.py | 22 +++++++++++-------- .../experiments/ippo_ff_p1_self_play.yaml | 16 +++++++------- 4 files changed, 29 insertions(+), 30 deletions(-) diff --git a/examples/experiments/eval/config/eval_config.yaml b/examples/experiments/eval/config/eval_config.yaml index d8a6acb8..5506f79d 100644 --- a/examples/experiments/eval/config/eval_config.yaml +++ b/examples/experiments/eval/config/eval_config.yaml @@ -2,11 +2,11 @@ res_path: examples/experiments/eval/dataframes/0120 # Store dataframes here test_dataset_size: 10_000 # Number of test scenarios to evaluate on # Environment settings -train_dir: "/scratch/kj2676/gpudrive/data/processed/training" -test_dir: "/scratch/kj2676/gpudrive/data/processed/validation" +train_dir: "data/processed/training" +test_dir: "data/processed/validation" num_worlds: 200 # Number of parallel environments for evaluation -max_controlled_agents: 128 # Maximum number of agents controlled by the model. +max_controlled_agents: 64 # Maximum number of agents controlled by the model. ego_state: true road_map_obs: true partner_obs: true @@ -24,4 +24,4 @@ polyline_reduction_threshold: 0.1 # Rate at which to sample points from the poly sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random obs_radius: 50.0 # Visibility radius of the agents -device: "cuda" # Options: "cpu", "cuda" \ No newline at end of file +device: "cuda" # Options: "cpu", "cuda" diff --git a/examples/experiments/eval/config/model_config.yaml b/examples/experiments/eval/config/model_config.yaml index 509c0507..9515f629 100644 --- a/examples/experiments/eval/config/model_config.yaml +++ b/examples/experiments/eval/config/model_config.yaml @@ -1,15 +1,10 @@ -models_path: examples/experiments/eval/models/0120 +models_path: wandb/run-20250121_225758-PPO_R_1000__01_21_22_57_53_461/files/runs/PPO_R_1000__01_21_22_57_53_461 models: - - name: random_baseline - train_dataset_size: null + - name: model_PPO_R_1000__01_21_22_57_53_461_001520 + train_dataset_size: 1000 wandb: null trained_on: null - - - name: model_PPO__R_1000__01_19_11_15_25_854_002500 - train_dataset_size: 1000 - wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_1000__01_19_11_15_25_854?nw=nwuserdaphnecor - trained_on: cluster # - name: model_PPO__R_1000__01_10_17_06_33_697_003500 # train_dataset_size: 1000 @@ -39,4 +34,4 @@ models: # # - name: model_PPO__R_100000__01_06_11_29_36_390_012000 # # train_dataset_size: 100_000 # # wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_100000__01_06_11_29_36_390?nw=nwuserdaphnecor - # # trained_on: cluster \ No newline at end of file + # # trained_on: cluster diff --git a/examples/experiments/eval/evaluate.py b/examples/experiments/eval/evaluate.py index 700e3803..1714ec14 100644 --- a/examples/experiments/eval/evaluate.py +++ b/examples/experiments/eval/evaluate.py @@ -101,7 +101,7 @@ def rollout( episode_len = env.config.episode_len # Reset episode - next_obs = env.reset() + next_obs = env.reset(env.cont_agent_mask) # Storage goal_achieved = torch.zeros((num_worlds, max_agent_count), device=device) @@ -123,7 +123,6 @@ def rollout( ) controlled_agent_mask = env.cont_agent_mask.clone() & ~bugged_agent_mask - live_agent_mask = controlled_agent_mask.clone() for time_step in range(episode_len): @@ -132,14 +131,14 @@ def rollout( # Get actions for active agents if live_agent_mask.any(): action, _, _, _ = policy( - next_obs[live_agent_mask], deterministic=deterministic + next_obs, deterministic=deterministic ) # Insert actions into a template action_template = torch.zeros( (num_worlds, max_agent_count), dtype=torch.int64, device=device ) - action_template[live_agent_mask] = action.to(device) + action_template[env.cont_agent_mask] = action.to(device) # Step the environment env.step_dynamics(action_template) @@ -166,7 +165,7 @@ def rollout( ) # Update observations, dones, and infos - next_obs = env.get_obs() + next_obs = env.get_obs(env.cont_agent_mask) dones = env.get_dones().bool() infos = env.get_infos() @@ -191,8 +190,8 @@ def rollout( for world in done_worlds: if world in active_worlds: - active_worlds.remove(world) logging.debug(f"World {world} done at time step {time_step}") + active_worlds.remove(world) if not active_worlds: # Exit early if all worlds are done break @@ -374,9 +373,10 @@ def make_env(config, train_loader): train_loader = SceneDataLoader( root=eval_config.train_dir, batch_size=eval_config.num_worlds, - dataset_size=model.train_dataset_size - if model.name != "random_baseline" - else 1000, + dataset_size=1000, #Below didn't work. Was None + #model.train_dataset_size + #if model.name != "random_baseline" + #else 1000, sample_with_replacement=False, shuffle=False, ) @@ -405,6 +405,10 @@ def make_env(config, train_loader): render_sim_state=False, ) + result = df_res_train.groupby('dataset')[['goal_achieved', 'collided', 'off_road', 'not_goal_nor_crashed']].agg(['mean', 'std']) + print('Result: ', result) + breakpoint() + df_res_test = evaluate_policy( env=env, policy=policy, diff --git a/examples/experiments/ippo_ff_p1_self_play.yaml b/examples/experiments/ippo_ff_p1_self_play.yaml index 56d5b2c0..b453569d 100644 --- a/examples/experiments/ippo_ff_p1_self_play.yaml +++ b/examples/experiments/ippo_ff_p1_self_play.yaml @@ -1,13 +1,13 @@ -mode: "sweep" +mode: "train" use_rnn: false eval_model_path: null baseline: false -data_dir: "data/processed/examples" +data_dir: "data/processed/training" environment: # Overrides default environment configs (see pygpudrive/env/config.py) name: "gpudrive" - num_worlds: 100 # Number of parallel environments - k_unique_scenes: 2 # Number of unique scenes to sample from + num_worlds: 200 # Number of parallel environments + k_unique_scenes: 1000 # Number of unique scenes to sample from max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp ego_state: true road_map_obs: true @@ -42,9 +42,9 @@ train: compile_mode: "reduce-overhead" # # # Data sampling # # # - resample_scenes: false - resample_dataset_size: 500 # Number of unique scenes to sample from - resample_interval: 1_000_000 + resample_scenes: True + resample_dataset_size: 1000 # Number of unique scenes to sample from + resample_interval: 5_000_000 sample_with_replacement: true shuffle_dataset: false @@ -71,7 +71,7 @@ train: # # # Network # # # network: input_dim: 64 # Embedding of the input features - hidden_dim: 192 # Latent dimension + hidden_dim: 128 # Latent dimension pred_heads_arch: [64] # Arch of the prediction heads (actor and critic) num_transformer_layers: 0 # Number of transformer layers dropout: 0.01