Emerge-Lab · daphne-cornelisse · Dec 27, 2024 · Dec 26, 2024 · Dec 26, 2024 · Dec 26, 2024
diff --git a/.env.template b/.env.template
@@ -0,0 +1,13 @@
+# .env template
+
+# Path for logs
+LOG_FOLDER=
+
+# Your HPC account code
+NYU_HPC_ACCOUNT=
+
+# NYU ID
+USERNAME=
+
+SINGULARITY_IMAGE=
+OVERLAY_FILE=
diff --git a/.gitignore b/.gitignore
@@ -189,7 +189,6 @@ celerybeat.pid
 *.sage.py
 
 # Environments
-.env
 .venv
 venv/
 ENV/
@@ -239,4 +238,11 @@ pyrightconfig.json
 
 *~
 
-# End of https://www.toptal.com/developers/gitignore/api/python,c++
+# Environment variables
+# To be manually created using .env.template
+.env 
+
+# Logs
+examples/experiments/scripts/logs/*
+
+# End of https://www.toptal.com/developers/gitignore/api/python,c++
diff --git a/baselines/ippo/config/ippo_ff_puffer.yaml b/baselines/ippo/config/ippo_ff_puffer.yaml
@@ -7,8 +7,8 @@ data_dir: "data/processed/examples"
 environment: # Overrides default environment configs (see pygpudrive/env/config.py)
   name: "gpudrive"
   num_worlds: 100 # Number of parallel environments
-  k_unique_scenes: 100 # Number of unique scenes to sample from
-  max_controlled_agents: 32 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
+  k_unique_scenes: 3 # Number of unique scenes to sample from
+  max_controlled_agents: 128 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
   ego_state: true
   road_map_obs: true
   partner_obs: true

diff --git a/baselines/ippo/ippo_pufferlib.py b/baselines/ippo/ippo_pufferlib.py
@@ -7,20 +7,26 @@
 """
 
 import os
+from typing import Optional
+from typing_extensions import Annotated
 import yaml
 from datetime import datetime
 import torch
 import wandb
 from box import Box
 from integrations.rl.puffer import ppo
 from integrations.rl.puffer.puffer_env import env_creator
-from integrations.rl.puffer.utils import Policy, LiDARPolicy
+from integrations.rl.puffer.utils import Policy
 
 import pufferlib
 import pufferlib.vector
 import pufferlib.frameworks.cleanrl
 from rich.console import Console
 
+import typer
+from typer import Typer
+
+app = Typer()
 
 def load_config(config_path):
     """Load the configuration file."""
@@ -42,7 +48,7 @@ def make_policy(env):
     return pufferlib.frameworks.cleanrl.Policy(Policy(env))
 
 
-def train(args):
+def train(args, make_env):
     """Main training loop for the PPO agent."""
     args.wandb = init_wandb(args, args.train.exp_id, id=args.train.exp_id)
     args.train.__dict__.update(dict(args.wandb.config.train))
@@ -131,9 +137,66 @@ def sweep(args, project="PPO", sweep_name="my_sweep"):
     wandb.agent(sweep_id, lambda: train(args), count=100)
 
 
-if __name__ == "__main__":
-
-    config = load_config("baselines/ippo/config/ippo_ff_puffer.yaml")
+@app.command()
+def run(
+    config_path: Annotated[
+        str, typer.Argument(help="The path to the default configuration file")
+    ] = "baselines/ippo/config/ippo_ff_puffer.yaml",
+    *,
+    #fmt: off
+    # Environment options
+    num_worlds: Annotated[Optional[int], typer.Option(help="Number of parallel envs")] = None,
+    k_unique_scenes: Annotated[Optional[int], typer.Option(help="The number of unique scenes to sample")] = None,
+    collision_weight: Annotated[Optional[float], typer.Option(help="The weight for collision penalty")] = None,
+    off_road_weight: Annotated[Optional[float], typer.Option(help="The weight for off-road penalty")] = None,
+    goal_achieved_weight: Annotated[Optional[float], typer.Option(help="The weight for goal-achieved reward")] = None,
+    dist_to_goal_threshold: Annotated[Optional[float], typer.Option(help="The distance threshold for goal-achieved")] = None,
+    sampling_seed: Annotated[Optional[int], typer.Option(help="The seed for sampling scenes")] = None,
+    obs_radius: Annotated[Optional[float], typer.Option(help="The radius for the observation")] = None,
+    # Train options
+    learning_rate: Annotated[Optional[float], typer.Option(help="The learning rate for training")] = None,
+    resample_scenes: Annotated[Optional[int], typer.Option(help="Whether to resample scenes during training; 0 or 1")] = None,
+    resample_interval: Annotated[Optional[int], typer.Option(help="The interval for resampling scenes")] = None,
+    total_timesteps: Annotated[Optional[int], typer.Option(help="The total number of training steps")] = None,
+    ent_coef: Annotated[Optional[float], typer.Option(help="Entropy coefficient")] = None,
+    # Wandb logging options
+    project: Annotated[Optional[str], typer.Option(help="WandB project name")] = None,
+    entity: Annotated[Optional[str], typer.Option(help="WandB entity name")] = None,
+    group: Annotated[Optional[str], typer.Option(help="WandB group name")] = None,
+):
+    """Run PPO training with the given configuration."""
+    #fmt: on
+
+    # Load default configs
+    config = load_config(config_path)
+
+    # Override configs with command-line arguments
+    env_config = {
+        "num_worlds": num_worlds,
+        "k_unique_scenes": k_unique_scenes,
+        "collision_weight": collision_weight,
+        "off_road_weight": off_road_weight,
+        "goal_achieved_weight": goal_achieved_weight,
+        "dist_to_goal_threshold": dist_to_goal_threshold,
+        "sampling_seed": sampling_seed,
+        "obs_radius": obs_radius,
+    }
+    config.environment.update({k: v for k, v in env_config.items() if v is not None})
+    train_config = {
+        "learning_rate": learning_rate,
+        "resample_scenes": None if resample_scenes is None else bool(resample_scenes),
+        "resample_interval": resample_interval,
+        "total_timesteps": total_timesteps,
+        "ent_coef": ent_coef,
+    }
+    config.train.update({k: v for k, v in train_config.items() if v is not None})
+
+    wandb_config = {
+        "project": project,
+        "entity": entity,
+        "group": group,
+    }
+    config.wandb.update({k: v for k, v in wandb_config.items() if v is not None})
 
     make_env = env_creator(
         data_dir=config.data_dir,
@@ -143,4 +206,7 @@ def sweep(args, project="PPO", sweep_name="my_sweep"):
     )
 
     if config.mode == "train":
-        train(config)
+        train(config, make_env)
+
+if __name__ == "__main__":
+    app()
diff --git a/baselines/scripts/bash_exec_paper_fig.sh b/baselines/scripts/bash_exec_paper_fig.sh
diff --git a/baselines/scripts/bash_exec_solve_n_scenes.sh b/baselines/scripts/bash_exec_solve_n_scenes.sh
diff --git a/baselines/scripts/sbatch_ippo.sh b/baselines/scripts/sbatch_ippo.sh
diff --git a/baselines/scripts/sbatch_paper_fig.sh b/baselines/scripts/sbatch_paper_fig.sh
diff --git a/baselines/scripts/sbatch_solve_n_scenes.sh b/baselines/scripts/sbatch_solve_n_scenes.sh
diff --git a/data/processed/debug/tfrecord-00005-of-00150_2bd577a009790706.json b/data/processed/debug/tfrecord-00005-of-00150_2bd577a009790706.json
diff --git a/data/processed/waymax/scenario_ab2a72c63f8fd589.pkl b/data/processed/waymax/scenario_ab2a72c63f8fd589.pkl
diff --git a/data/processed/waymax/waymax_scenario_11671609ebfa3185.pkl b/data/processed/waymax/waymax_scenario_11671609ebfa3185.pkl
diff --git a/data/processed/waymax/waymax_scenario_ab2a72c63f8fd589.pkl b/data/processed/waymax/waymax_scenario_ab2a72c63f8fd589.pkl
diff --git a/environment.yml b/environment.yml
@@ -62,4 +62,9 @@ dependencies:
       - urllib3==2.2.1
       - virtualenv==20.25.1
       - zipp==3.18.1
-      - huggingface_hub==0.26.5
+      - huggingface_hub==0.26.5
+      - wandb==0.19.1
+      - python-box==7.3.0
+      - python-dotenv==1.0.1
+      - jax==0.4.0
+      - typer==0.9.0