-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRLinterface2.py
executable file
·134 lines (116 loc) · 6.2 KB
/
RLinterface2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#<html><body><pre>
# RLinterface module
"""
This module provides a standard interface for computational experiments with
reinforcement-learning agents and environments. The interface is designed to
facilitate comparison of different agent designs and their application to different
problems (environments). See http://abee.cs.ualberta.ca:7777/rl-twiki/bin/view/RLAI/RLI5.
Class: RLinterface
initialize with: rli = RLinterface(agentFunction, envFunction)
where agentStartFunction(s) -> a
agentStepFunction(s, r) -> a
envStartFunction() -> s
envStepFunction(a) -> s, r
Methods:
step() --> r, s, a
steps(numSteps) --> r, s, a, r, s, a, r, s, a, ...
episode([maxSteps]) --> s0, a0, r1, s1, a1, ..., rT, 'terminal'
episodes(num, maxSteps [,maxStepsTotal]) --> s0, a0, r1, s1, a1, ..., rT, 'terminal', s0, a0 ...
stepsQ(numSteps) like steps but no returned value (quicker and quieter)
episodeQ([maxSteps]) like episode but no returned value (quicker and quieter)
episodesQ(num, maxSteps [,maxTotal]) like episodes but no returned value (quicker and quieter)
"""
class RLinterface: #<a name="RLinterface"></a>[<a href="RLdoc.html#RLinterface">Doc</a>]
"""Object associating a reinforcement learning agent with its environment;
stores next action; see http://rlai.cs.ualberta.ca/RLAI/RLinterface.html."""
def __init__(self, agentStartFn, agentStepFn, envStartFn, envStepFn):
"""Store functions defining agent and environment"""
self.agentStartFunction = agentStartFn
self.environmentStartFunction = envStartFn
self.agentStepFunction = agentStepFn
self.environmentStepFunction = envStepFn
self.s = 'terminal' # force start of new episode
self.action = None # the action to be used in the next step
def step (self): #<a name="step"></a>[<a href="RLdoc.html#step">Doc</a>]
"""Run one step; this is the core function, used by all the others in RLinterface module."""
if self.s == 'terminal': # first step of an episode
return self.startEpisode()
else:
return self.stepnext()
def stepnext (self): #<a name="stepnext"></a>[<a href="RLdoc.html#stepnext">Doc</a>]
"""Run one step which is not a first step in an episode."""
self.s, r = self.environmentStepFunction(self.action)
self.action = self.agentStepFunction(self.s, r)
if self.s == 'terminal': # last step of an episode
return r, self.s # no action but agent learned
else: # regular step
return r, self.s, self.action # action and learning
def steps (self, numSteps): #<a name="steps"></a>[<a href="RLdoc.html#steps">Doc</a>]
"""Run for numSteps steps, regardless of episode endings.
return the sequence of sensations, rewards and actions."""
oaseq = []
for step in xrange(numSteps): # run for numSteps steps
new = self.step()
oaseq.extend(new)
return oaseq
def startEpisode(self):
"Call the environment and agent start functions"
self.s = self.environmentStartFunction()
self.action = self.agentStartFunction(self.s)
return [self.s, self.action]
def episode (self, maxSteps=1000000): #<a name="episode"></a>[<a href="RLdoc.html#episode">Doc</a>]
"""Run for one episode, to a maximum of maxSteps steps, and return the episode."""
oaseq = self.startEpisode()
step = 1
while self.s != 'terminal' and step < maxSteps: #stop at end of episode or maxsteps
new = self.stepnext()
oaseq.extend(new)
step += 1
return oaseq
def episodes (self, numEpisodes, maxSteps=1000000, maxStepsTotal=1000000): #<a name="episodes"></a>[<a href="RLdoc.html#episodes">Doc</a>]
"""Generate numEpisodes episodes, each no more than maxSteps steps,
with no more than maxStepsTotal total; return episodesin one sequence."""
totsteps = 0
oaseq = []
episodeNum = 0
while episodeNum < numEpisodes and totsteps < maxStepsTotal: # run for numEpisodes episodes
oaseq = self.startEpisode() # start new episode
steps = 1
totsteps += 1
episodeNum += 1
while self.s != 'terminal' and \
steps < maxSteps and totsteps < maxStepsTotal: # stop at end or too many steps
new = self.stepnext()
oaseq.extend(new)
totsteps +=1
steps += 1
return oaseq
def stepsQ (self, numSteps): #<a name="stepsQ"></a>[<a href="RLdoc.html#stepsQ">Doc</a>]
"""Same as steps but quicker, quieter, and returns nothing."""
for step in xrange(numSteps): # run for numSteps steps
self.step()
def episodeQ (self, maxSteps=1000000): #<a name="episodeQ"></a>[<a href="RLdoc.html#episodeQ">Doc</a>]
"""Same as episode but quicker, quieter, and returns nothing."""
self.startEpisode()
step = 1
while self.s != 'terminal' and step < maxSteps: #stop at end of episode or maxsteps
self.stepnext()
step += 1
def episodesQ (self, numEpisodes, maxSteps=1000000, maxStepsTotal=1000000): #<a name="episodesQ"></a>[<a href="RLdoc.html#episodesQ">Doc</a>]
"""Same as episodes but quicker, quieter, and returns nothing."""
totsteps = 0
episodeNum = 0
while episodeNum < numEpisodes and totsteps < maxStepsTotal: # run for numEpisodes episodes
self.startEpisode() # start new episode
steps = 1
totsteps += 1
episodeNum += 1
while self.s != 'terminal' and \
steps < maxSteps and totsteps < maxStepsTotal: # stop at end or too many steps
self.stepnext()
totsteps +=1
steps += 1
def stepstaken(elist):
"Returns the number of steps given the list of states, actions and rewards"
return elist // 3
#</pre></body></html>