31 lines
973 B
Python
31 lines
973 B
Python
def evaluate_policy(env, agent, turns = 3):
|
|
total_scores = 0
|
|
for j in range(turns):
|
|
s = env.reset()
|
|
done = False
|
|
action_series = []
|
|
while not done:
|
|
# Take deterministic actions at test time
|
|
a = agent.select_action(s, deterministic=True)
|
|
s_next, r, dw, tr, info = env.step(a)
|
|
done = (dw or tr)
|
|
action_series.append(a)
|
|
total_scores += r
|
|
s = s_next
|
|
print('action series: ', action_series)
|
|
print('state: ', s)
|
|
return int(total_scores/turns)
|
|
|
|
|
|
#You can just ignore this funciton. Is not related to the RL.
|
|
def str2bool(v):
|
|
'''transfer str to bool for argparse'''
|
|
if isinstance(v, bool):
|
|
return v
|
|
if v.lower() in ('yes', 'True','true','TRUE', 't', 'y', '1'):
|
|
return True
|
|
elif v.lower() in ('no', 'False','false','FALSE', 'f', 'n', '0'):
|
|
return False
|
|
else:
|
|
print('Wrong Input.')
|
|
raise |