-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexample_ray_team_vs_random.py
56 lines (48 loc) · 1.58 KB
/
example_ray_team_vs_random.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import ray
from ray import tune
from soccer_twos import EnvType
from utils import create_rllib_env
NUM_ENVS_PER_WORKER = 5
if __name__ == "__main__":
ray.init()
tune.registry.register_env("Soccer", create_rllib_env)
analysis = tune.run(
"PPO",
name="PPO_1",
config={
# system settings
"num_gpus": 1,
"num_workers": 8,
"num_envs_per_worker": NUM_ENVS_PER_WORKER,
"log_level": "INFO",
"framework": "torch",
# RL setup
"env": "Soccer",
"env_config": {
"num_envs_per_worker": NUM_ENVS_PER_WORKER,
"variation": EnvType.team_vs_policy,
"multiagent": False,
},
"model": {
"vf_share_layers": True,
"fcnet_hiddens": [512, 512],
},
},
stop={
"timesteps_total": 20000000, # 15M
# "time_total_s": 14400, # 4h
},
checkpoint_freq=100,
checkpoint_at_end=True,
local_dir="./ray_results",
# restore="./ray_results/PPO_selfplay_1/PPO_Soccer_ID/checkpoint_00X/checkpoint-X",
)
# Gets best trial based on max accuracy across all training iterations.
best_trial = analysis.get_best_trial("episode_reward_mean", mode="max")
print(best_trial)
# Gets best checkpoint for trial based on accuracy.
best_checkpoint = analysis.get_best_checkpoint(
trial=best_trial, metric="episode_reward_mean", mode="max"
)
print(best_checkpoint)
print("Done training")