We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 3394e44 commit 21d9136Copy full SHA for 21d9136
1 file changed
rl.py
@@ -52,3 +52,32 @@ def __call__(self, percept):
52
53
def update_state(self, percept):
54
raise NotImplementedError
55
+
56
57
+def run_single_trial(agent_program, mdp):
58
+ ''' Execute trial for given agent_program
59
+ and mdp. mdp should be an instance of subclass
60
+ of mdp.MDP '''
61
62
+ def take_single_action(mdp, s, a):
63
+ '''
64
+ Selects outcome of taking action a
65
+ in state s. Weighted Sampling.
66
67
+ x = random.uniform(0, 1)
68
+ cumulative_probability = 0.0
69
+ for probabilty_state in mdp.T(s, a):
70
+ probabilty, state = probabilty_state
71
+ cumulative_probability += probabilty
72
+ if x < cumulative_probability:
73
+ break
74
+ return state
75
76
+ current_state = mdp.init
77
+ while True:
78
+ current_reward = mdp.R(current_state)
79
+ percept = (current_state, current_reward)
80
+ next_action = agent_program(percept)
81
+ if next_action is None:
82
83
+ current_state = take_single_action(mdp, current_state, next_action)
0 commit comments