-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest_ddpg.py
137 lines (98 loc) · 4.62 KB
/
test_ddpg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from numpy.random import seed
seed(28)
from tensorflow import random
random.set_seed(28)
from reinforcement_learning.utils.plotter import plot_running_average
from reinforcement_learning.deep_RL.const import LIBRARY_TF, LIBRARY_KERAS, LIBRARY_TORCH, \
OPTIMIZER_Adam, OPTIMIZER_RMSprop, OPTIMIZER_Adadelta, OPTIMIZER_Adagrad, OPTIMIZER_SGD, \
INPUT_TYPE_OBSERVATION_VECTOR
from reinforcement_learning.deep_RL.utils.utils import get_file_name, run_trained_agent
from reinforcement_learning.deep_RL.utils.devices import set_device
from reinforcement_learning.deep_RL.envs import CartPole, Pendulum, MountainCarContinuous, \
LunarLander, LunarLanderContinuous, BipedalWalker, Breakout, SpaceInvaders
from reinforcement_learning.deep_RL.algorithms.deep_deterministic_policy_gradient import Agent, train_agent
def play_ddpg(custom_env, n_episodes, fc_layers_dims, optimizer_type, alpha, beta,
lib_type=LIBRARY_TF, enable_models_saving=False, load_checkpoint=False,
plot=True, test=False):
if custom_env.is_discrete_action_space:
print('\n', "Environment's Action Space should be continuous!", '\n')
return
if custom_env.input_type != INPUT_TYPE_OBSERVATION_VECTOR:
print('\n', 'Algorithm currently works only with INPUT_TYPE_OBSERVATION_VECTOR!', '\n')
return
if lib_type == LIBRARY_KERAS:
print('\n', "Algorithm currently doesn't work with Keras", '\n')
return
tau = 0.001
custom_env.env.seed(28)
set_device(lib_type, devices_dict=None)
method_name = 'DDPG'
base_dir = 'tmp/' + custom_env.file_name + '/' + method_name + '/'
agent = Agent(custom_env, fc_layers_dims, tau,
optimizer_type, alpha, beta,
memory_batch_size=custom_env.memory_batch_size, lib_type=lib_type, base_dir=base_dir)
scores_history = train_agent(custom_env, agent, n_episodes,
enable_models_saving, load_checkpoint)
if plot:
plot_running_average(
custom_env.name, method_name, scores_history,
# file_name=get_file_name(custom_env.file_name, agent, n_episodes, method_name) + '_train',
directory=agent.chkpt_dir if enable_models_saving else None
)
scores_history_test = None
if test:
scores_history_test = run_trained_agent(custom_env, agent, enable_models_saving)
if plot:
plot_running_average(
custom_env.name, method_name, scores_history_test,
# file_name=get_file_name(custom_env.file_name, agent, n_episodes, method_name) + '_test',
directory=agent.chkpt_dir if enable_models_saving else None
)
return agent, scores_history, scores_history_test
#################################
# Continuous AS:
def run_ddpg_pendulum(lib_type):
custom_env = Pendulum()
fc_layers_dims = [800, 600]
optimizer_type = OPTIMIZER_Adam
alpha = 0.00005
beta = alpha * 10
n_episodes = 2 # n_episodes = 1000
play_ddpg(custom_env, n_episodes, fc_layers_dims, optimizer_type, alpha, beta, lib_type)
def run_ddpg_mountain_car_continuous(lib_type):
custom_env = MountainCarContinuous()
fc_layers_dims = [400, 300]
optimizer_type = OPTIMIZER_Adam
alpha = 0.000025
beta = alpha * 10
n_episodes = 2 # n_episodes = 1000
play_ddpg(custom_env, n_episodes, fc_layers_dims, optimizer_type, alpha, beta, lib_type)
def run_ddpg_lunar_lander_continuous(lib_type):
custom_env = LunarLanderContinuous()
fc_layers_dims = [400, 300]
optimizer_type = OPTIMIZER_Adam
alpha = 0.000025
beta = alpha * 10
n_episodes = 2 # n_episodes = 1000
play_ddpg(custom_env, n_episodes, fc_layers_dims, optimizer_type, alpha, beta, lib_type)
def run_ddpg_bipedal_walker(lib_type):
custom_env = BipedalWalker()
fc_layers_dims = [400, 300]
optimizer_type = OPTIMIZER_Adam
alpha = 0.00005
beta = alpha * 10
n_episodes = 2 # n_episodes = 5000
play_ddpg(custom_env, n_episodes, fc_layers_dims, optimizer_type, alpha, beta, lib_type)
#################################
def run_test_OBSVEC_CONTINUOUS(lib_type):
run_ddpg_pendulum(lib_type) # n_actions = 1
run_ddpg_mountain_car_continuous(lib_type) # n_actions = 1
run_ddpg_lunar_lander_continuous(lib_type) # n_actions = 2
run_ddpg_bipedal_walker(lib_type) # n_actions = 4
#################################
def test_OBSVEC_CONTINUOUS_TF():
run_test_OBSVEC_CONTINUOUS(LIBRARY_TF)
# def test_OBSVEC_CONTINUOUS_KERAS():
# run_test_OBSVEC_CONTINUOUS(LIBRARY_KERAS)
def test_OBSVEC_CONTINUOUS_TORCH():
run_test_OBSVEC_CONTINUOUS(LIBRARY_TORCH)