from . import * def play(player_episodes=1): q_table = pickle.load( open( gym_name+".dat", "rb" )) total_epochs = 0 episodes = player_episodes print(f"Evaluation: 0%") total_epochs, total_rewards = 0, 0 try: for ep in range(episodes): state = env.reset() epochs, reward = 0, 0 done = False i = 0 while not done: action = np.argmax(q_table[state]) state, reward, done, info = env.step(action) if i > max_iterations: done = True ftext = "" if use_ansi: ftext = env.render(mode="ansi") else: ftext = str(info) i += 1 print (u"{}[2J{}[;H".format(chr(27), chr(27))) print(f"Evaluation: {100 * ep / episodes}%") print(f"{ftext}") sleep(.1) epochs += 1 total_epochs += epochs total_rewards += reward sleep(1) except KeyboardInterrupt: print(f"Results after {episodes} episodes:") print(f"Average timesteps per episode: {total_epochs / episodes}") print(f"Average rewards per episode: {total_rewards / episodes}") exit() print (u"{}[2J{}[;H".format(chr(27), chr(27))) print("Evaluation: finished.\n") print(f"Results after {episodes} episodes:") print(f"Average timesteps per episode: {total_epochs / episodes}") print(f"Average rewards per episode: {total_rewards / episodes}")