|
- from . import *
-
- def play(player_episodes=1):
- q_table = pickle.load( open( gym_name+".dat", "rb" ))
-
- total_epochs = 0
- episodes = player_episodes
-
- print(f"Evaluation: 0%")
-
- total_epochs, total_rewards = 0, 0
-
- try:
- for ep in range(episodes):
- state = env.reset()
- epochs, reward = 0, 0
-
- done = False
-
- i = 0
- while not done:
- action = np.argmax(q_table[state])
- state, reward, done, info = env.step(action)
-
- if i > max_iterations:
- done = True
-
- ftext = ""
- if use_video:
- env.render(mode="ansi")
- if use_ansi:
- ftext = env.render(mode="ansi")
- else:
- ftext = str(info)
-
- i += 1
- print (u"{}[2J{}[;H".format(chr(27), chr(27)))
- print(f"Evaluation: {100 * ep / episodes}%")
- print(f"{ftext}")
- sleep(.1)
-
- epochs += 1
-
- total_epochs += epochs
- total_rewards += reward
-
-
- sleep(1)
- except KeyboardInterrupt:
- print(f"Results after {episodes} episodes:")
- print(f"Average timesteps per episode: {total_epochs / episodes}")
- print(f"Average rewards per episode: {total_rewards / episodes}")
-
- exit()
-
- print (u"{}[2J{}[;H".format(chr(27), chr(27)))
- print("Evaluation: finished.\n")
-
- print(f"Results after {episodes} episodes:")
- print(f"Average timesteps per episode: {total_epochs / episodes}")
- print(f"Average rewards per episode: {total_rewards / episodes}")
|