import numpy as np def evaluate_policy(env, agent, turns = 3): total_scores = 0 for j in range(turns): s = env.reset() done = False action_series = [] while not done: # Take deterministic actions at test time a = agent.select_action(s, deterministic=True) s_next, r, dw, tr, info = env.step(a) done = (dw or tr) action_series.append(a) total_scores += r s = s_next print('action series: ', np.roudn(action_series, 3)) print('state: ', s) return int(total_scores/turns) #You can just ignore this funciton. Is not related to the RL. def str2bool(v): '''transfer str to bool for argparse''' if isinstance(v, bool): return v if v.lower() in ('yes', 'True','true','TRUE', 't', 'y', '1'): return True elif v.lower() in ('no', 'False','false','FALSE', 'f', 'n', '0'): return False else: print('Wrong Input.') raise