|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- from . import *
-
- def train(training_episodes=10000, resume=True):
- if resume and os.path.exists(gym_name+".dat"):
- q_table = pickle.load( open( gym_name+".dat", "rb" ))
- else:
- q_table = np.zeros([env.observation_space.n, env.action_space.n])
-
- episodes = training_episodes
- percentage = episodes / 100
-
- frames = []
- suc_cnt = 0
-
- try:
- for i in range(1, episodes + 1):
- state = env.reset()
-
- epochs, reward, = 0, 0
- done = False
-
- while not done:
- if random.uniform(0, 1) < epsilon:
- action = env.action_space.sample()
- else:
- action = np.argmax(q_table[state])
-
- next_state, reward, done, info = env.step(action)
-
- old_value = q_table[state, action]
- next_max = np.max(q_table[next_state])
-
- new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
- q_table[state, action] = new_value
-
- if reward > 0:
- suc_cnt += 1
-
- state = next_state
- epochs += 1
-
- if i % percentage == 0:
- ftext = ""
- if use_ansi:
- ftext = env.render(mode="ansi")
- else:
- ftext = str(info)
-
- frames.append({
- 'frame': ftext,
- 'state': state,
- 'action': action,
- 'reward': reward,
- 'session': i
- }
- )
-
- if i % percentage == 0:
- print (u"{}[2J{}[;H".format(chr(27), chr(27)))
- print(f"Training: {i/percentage}%")
- print(f"Successes so far: {suc_cnt}")
- sleep(.1)
- print (u"{}[2J{}[;H".format(chr(27), chr(27)))
- print("Training: finished.\n")
- print(f"Successes totally: {suc_cnt}")
- pickle.dump(q_table , open( gym_name+".dat", "wb" ) )
- print(f"Q-table saved: {gym_name}.dat")
-
- except KeyboardInterrupt:
- print (u"{}[2J{}[;H".format(chr(27), chr(27)))
- print("Training: stopped.\n")
- print(f"Successes totally: {suc_cnt}")
- pickle.dump(q_table , open( gym_name+".dat", "wb" ) )
- print(f"Q-table saved: {gym_name}_stopped.dat")
- exit()
-
- return frames
|