My OpenAI playground
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

60 satır
1.6KB

  1. from . import *
  2. def play(player_episodes=1):
  3. q_table = pickle.load( open( gym_name+".dat", "rb" ))
  4. total_epochs = 0
  5. episodes = player_episodes
  6. print(f"Evaluation: 0%")
  7. total_epochs, total_rewards = 0, 0
  8. try:
  9. for ep in range(episodes):
  10. state = env.reset()
  11. epochs, reward = 0, 0
  12. done = False
  13. i = 0
  14. while not done:
  15. action = np.argmax(q_table[state])
  16. state, reward, done, info = env.step(action)
  17. if i > max_iterations:
  18. done = True
  19. ftext = ""
  20. if use_ansi:
  21. ftext = env.render(mode="ansi")
  22. else:
  23. ftext = str(info)
  24. i += 1
  25. print (u"{}[2J{}[;H".format(chr(27), chr(27)))
  26. print(f"Evaluation: {100 * ep / episodes}%")
  27. print(f"{ftext}")
  28. sleep(.1)
  29. epochs += 1
  30. total_epochs += epochs
  31. total_rewards += reward
  32. sleep(1)
  33. except KeyboardInterrupt:
  34. print(f"Results after {episodes} episodes:")
  35. print(f"Average timesteps per episode: {total_epochs / episodes}")
  36. print(f"Average rewards per episode: {total_rewards / episodes}")
  37. exit()
  38. print (u"{}[2J{}[;H".format(chr(27), chr(27)))
  39. print("Evaluation: finished.\n")
  40. print(f"Results after {episodes} episodes:")
  41. print(f"Average timesteps per episode: {total_epochs / episodes}")
  42. print(f"Average rewards per episode: {total_rewards / episodes}")