My OpenAI playground
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

4 anos atrás
4 anos atrás
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. from . import *
  2. def play(player_episodes=1):
  3. q_table = pickle.load( open( gym_name+".dat", "rb" ))
  4. total_epochs = 0
  5. episodes = player_episodes
  6. print(f"Evaluation: 0%")
  7. total_epochs, total_rewards = 0, 0
  8. try:
  9. for ep in range(episodes):
  10. state = env.reset()
  11. epochs, reward = 0, 0
  12. done = False
  13. i = 0
  14. while not done:
  15. action = np.argmax(q_table[state])
  16. state, reward, done, info = env.step(action)
  17. if i > max_iterations:
  18. done = True
  19. ftext = ""
  20. if use_video:
  21. env.render(mode="ansi")
  22. if use_ansi:
  23. ftext = env.render(mode="ansi")
  24. else:
  25. ftext = str(info)
  26. i += 1
  27. print (u"{}[2J{}[;H".format(chr(27), chr(27)))
  28. print(f"Evaluation: {100 * ep / episodes}%")
  29. print(f"{ftext}")
  30. sleep(.1)
  31. epochs += 1
  32. total_epochs += epochs
  33. total_rewards += reward
  34. sleep(1)
  35. except KeyboardInterrupt:
  36. print(f"Results after {episodes} episodes:")
  37. print(f"Average timesteps per episode: {total_epochs / episodes}")
  38. print(f"Average rewards per episode: {total_rewards / episodes}")
  39. exit()
  40. print (u"{}[2J{}[;H".format(chr(27), chr(27)))
  41. print("Evaluation: finished.\n")
  42. print(f"Results after {episodes} episodes:")
  43. print(f"Average timesteps per episode: {total_epochs / episodes}")
  44. print(f"Average rewards per episode: {total_rewards / episodes}")