-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01_agent_anatomy.py
42 lines (29 loc) · 873 Bytes
/
01_agent_anatomy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import random
class Environment:
def __init__(self):
self.steps_left = 10
def get_observation(self):
return [0.0, 0.0, 0.0]
def get_actions(self):
return [0, 1]
def is_done(self):
return self.steps_left == 0
def action(self, action):
if self.is_done():
raise Exception("Game is over")
self.steps_left -= 1
return random.random()
class Agent:
def __init__(self):
self.total_reward = 0.0
def step(self, env):
current_obs = env.get_observation()
actions = env.get_actions()
reward = env.action(random.choice(actions))
self.total_reward += reward
if __name__ == "__main__":
env = Environment()
agent = Agent()
while not env.is_done():
agent.step(env)
print("Total reward got: %.4f" % agent.total_reward)