-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest_dql.py
151 lines (107 loc) · 4.86 KB
/
test_dql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from numpy.random import seed
seed(28)
from tensorflow import random
random.set_seed(28)
from reinforcement_learning.utils.plotter import plot_running_average
from reinforcement_learning.deep_RL.const import LIBRARY_TF, LIBRARY_KERAS, LIBRARY_TORCH, \
OPTIMIZER_Adam, OPTIMIZER_RMSprop, OPTIMIZER_Adadelta, OPTIMIZER_Adagrad, OPTIMIZER_SGD
from reinforcement_learning.deep_RL.utils.utils import get_file_name, run_trained_agent
from reinforcement_learning.deep_RL.utils.devices import set_device
from reinforcement_learning.deep_RL.envs import CartPole, Pendulum, MountainCarContinuous, \
LunarLander, LunarLanderContinuous, BipedalWalker, Breakout, SpaceInvaders
from reinforcement_learning.deep_RL.algorithms.deep_q_learning import Agent, train_agent
def play_dql(custom_env, n_episodes, fc_layers_dims, optimizer_type, alpha, double_dql, tau,
lib_type=LIBRARY_TF, enable_models_saving=False, load_checkpoint=False,
perform_random_gameplay=True, rnd_gameplay_episodes=None,
plot=True, test=False):
if not custom_env.is_discrete_action_space:
print('\n', "Environment's Action Space should be discrete!", '\n')
return
custom_env.env.seed(28)
set_device(lib_type, devices_dict=None)
method_name = 'DQL'
base_dir = 'tmp/' + custom_env.file_name + '/' + method_name + '/'
agent = Agent(custom_env, fc_layers_dims, n_episodes,
alpha, optimizer_type,
double_dql=double_dql, tau=tau, lib_type=lib_type, base_dir=base_dir)
scores_history = train_agent(custom_env, agent, n_episodes,
enable_models_saving, load_checkpoint,
perform_random_gameplay, rnd_gameplay_episodes)
if plot:
plot_running_average(
custom_env.name, method_name, scores_history,
# file_name=get_file_name(custom_env.file_name, agent, n_episodes, method_name) + '_train',
directory=agent.chkpt_dir if enable_models_saving else None
)
scores_history_test = None
if test:
scores_history_test = run_trained_agent(custom_env, agent, enable_models_saving)
if plot:
plot_running_average(
custom_env.name, method_name, scores_history_test,
# file_name=get_file_name(custom_env.file_name, agent, n_episodes, method_name) + '_test',
directory=agent.chkpt_dir if enable_models_saving else None
)
return agent, scores_history, scores_history_test
#################################
# Discrete AS:
def run_dql_cartpole(libtype):
custom_env = CartPole()
fc_layers_dims = [256, 256]
optimizer_type = OPTIMIZER_Adam
alpha = 0.0005 # 0.003 ?
double_dql = False
tau = None
n_episodes = 5 # n_episodes = 500 # ~150-200 solves LunarLander
play_dql(custom_env, n_episodes, fc_layers_dims, optimizer_type, alpha, double_dql, tau, libtype,
rnd_gameplay_episodes=n_episodes)
def run_dql_lunar_lander(libtype):
custom_env = LunarLander()
fc_layers_dims = [256, 256]
optimizer_type = OPTIMIZER_Adam
alpha = 0.0005 # 0.003 ?
double_dql = False
tau = None
n_episodes = 5 # n_episodes = 500 # ~150-200 solves LunarLander
play_dql(custom_env, n_episodes, fc_layers_dims, optimizer_type, alpha, double_dql, tau, libtype,
rnd_gameplay_episodes=n_episodes)
def run_dql_breakout(libtype):
custom_env = Breakout()
fc_layers_dims = [1024]
optimizer_type = OPTIMIZER_RMSprop # OPTIMIZER_SGD
alpha = 0.00025
double_dql = True
tau = 10000
n_episodes = 2 # n_episodes = 200 # start with 200, then 5000 ?
play_dql(custom_env, n_episodes, fc_layers_dims, optimizer_type, alpha, double_dql, tau, libtype,
rnd_gameplay_episodes=n_episodes)
def run_dql_space_invaders(libtype):
custom_env = SpaceInvaders()
fc_layers_dims = [1024]
optimizer_type = OPTIMIZER_RMSprop # OPTIMIZER_SGD
alpha = 0.003
double_dql = True
tau = None
n_episodes = 2 # n_episodes = 50
play_dql(custom_env, n_episodes, fc_layers_dims, optimizer_type, alpha, double_dql, tau, libtype,
rnd_gameplay_episodes=n_episodes)
#################################
def run_test_OBSVEC_DISCRETE(lib_type):
run_dql_cartpole(lib_type)
run_dql_lunar_lander(lib_type)
def run_test_FRAMES_DISCRETE(lib_type):
run_dql_breakout(lib_type)
run_dql_space_invaders(lib_type)
#################################
def test_OBSVEC_TF():
run_test_OBSVEC_DISCRETE(LIBRARY_TF)
def test_OBSVEC_KERAS():
run_test_OBSVEC_DISCRETE(LIBRARY_KERAS)
def test_OBSVEC_TORCH():
run_test_OBSVEC_DISCRETE(LIBRARY_TORCH)
def test_FRAMES_TF():
run_test_FRAMES_DISCRETE(LIBRARY_TF)
def test_FRAMES_KERAS():
run_test_FRAMES_DISCRETE(LIBRARY_KERAS)
def test_FRAMES_TORCH():
run_test_FRAMES_DISCRETE(LIBRARY_TORCH)