public void Learn() { Random rand = new Random(); learner = new QLearner(); for (int i = 0; i < LearningConstants.LearningRounds; i++) { int[] agentPos = new int[] { rand.Next(map.Dimension), rand.Next(map.Dimension) }; State state = new State(map, agentPos); agent = new Agent(state, learner); for (int j = 0; j < stepLimit; j++) { bool isDone = agent.Act(state, agent.ChooseAction(state), (1 - (double)j / stepLimit)); if (isDone) { learningDone = true; break; } } } }
public Agent(State initialState, QLearner qLearner) { learner = qLearner; currentState = initialState; }