// Q-Learning thread private void QLearningThread() { int iteration = 0; // current coordinates of the agent int agentCurrentX, agentCurrentY; // exploration policy TabuSearchExploration tabuPolicy = (TabuSearchExploration)qLearning.ExplorationPolicy; EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy; // loop while ((!needToStop) && (iteration < learningIterations)) { // set exploration rate for this iteration explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate; // set learning rate for this iteration qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate; // clear tabu list tabuPolicy.ResetTabuList(); // reset agent's coordinates to the starting position agentCurrentX = agentStartX; agentCurrentY = agentStartY; // steps performed by agent to get to the goal int steps = 0; while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY))) { steps++; // get agent's current state int currentState = GetStateNumber(agentCurrentX, agentCurrentY); // get the action for this state int action = qLearning.GetAction(currentState); // update agent's current position and get his reward double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action); // get agent's next state int nextState = GetStateNumber(agentCurrentX, agentCurrentY); // do learning of the agent - update his Q-function qLearning.UpdateState(currentState, action, reward, nextState); // set tabu action tabuPolicy.SetTabuAction((action + 2) % 4, 1); } System.Diagnostics.Debug.WriteLine(steps); iteration++; // show current iteration SetText(iterationBox, iteration.ToString()); } // enable settings controls EnableControls(true); }
private int GetAction(int currentState) { if (qLearning != null) { return(qLearning.GetAction(currentState)); } if (sarsa != null) { return(sarsa.GetAction(currentState)); } if (doubleQLearning != null) { return(doubleQLearning.GetAction(currentState)); } return(doubleQLearning.GetAction(currentState)); }
// Show solution thread private void ShowSolutionThread() { TabuSearchExploration tabuPolicy; if (qLearning != null) { tabuPolicy = (TabuSearchExploration)qLearning.ExplorationPolicy; } else if (sarsa != null) { tabuPolicy = (TabuSearchExploration)sarsa.ExplorationPolicy; } else { throw new Exception(); } var explorationPolicy = (EpsilonGreedyExploration)tabuPolicy?.BasePolicy; explorationPolicy.Epsilon = 0; tabuPolicy?.ResetTabuList(); int agentCurrentX = agentStartX, agentCurrentY = agentStartY; Array.Copy(map, mapToDisplay, mapWidth * mapHeight); mapToDisplay[agentStartY, agentStartX] = 2; mapToDisplay[agentStopY, agentStopX] = 3; while (!needToStop) { cellWorld.Map = mapToDisplay; Thread.Sleep(200); if ((agentCurrentX == agentStopX) && (agentCurrentY == agentStopY)) { mapToDisplay[agentStartY, agentStartX] = 2; mapToDisplay[agentStopY, agentStopX] = 3; agentCurrentX = agentStartX; agentCurrentY = agentStartY; cellWorld.Map = mapToDisplay; Thread.Sleep(200); } mapToDisplay[agentCurrentY, agentCurrentX] = 0; int currentState = GetStateNumber(agentCurrentX, agentCurrentY); int action = qLearning?.GetAction(currentState) ?? sarsa.GetAction(currentState); UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action); mapToDisplay[agentCurrentY, agentCurrentX] = 2; } // enable settings controls EnableControls(true); }
// Q-Learning thread private void QLearningThread() { int iteration = 0; TabuSearchExploration tabuPolicy = (TabuSearchExploration)qLearning.ExplorationPolicy; EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy; while ((!needToStop) && (iteration < learningIterations)) { explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate; qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate; tabuPolicy.ResetTabuList(); var agentCurrentX = agentStartX; var agentCurrentY = agentStartY; int steps = 0; while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY))) { steps++; int currentState = GetStateNumber(agentCurrentX, agentCurrentY); int action = qLearning.GetAction(currentState); double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action); int nextState = GetStateNumber(agentCurrentX, agentCurrentY); // do learning of the agent - update his Q-function, set Tabu action qLearning.UpdateState(currentState, action, reward, nextState); tabuPolicy.SetTabuAction((action + 2) % 4, 1); } System.Diagnostics.Debug.WriteLine(steps); iteration++; SetText(iterationBox, iteration.ToString()); } EnableControls(true); }
/// <summary> /// Gets action determined by the current state /// </summary> /// <param name="state">state number</param> /// <returns>action number</returns> public int GetAction(int state) { return(qLearning.GetAction(state)); }
public void learn_test() { #region doc_main // Fix the random number generator Accord.Math.Random.Generator.Seed = 0; // In this example, we will be using the QLearning algorithm // to make a robot learn how to navigate a map. The map is // shown below, where a 1 denotes a wall and 0 denotes areas // where the robot can navigate: // int[,] map = { { 1, 1, 1, 1, 1, 1, 1, 1, 1 }, { 1, 1, 0, 0, 0, 0, 0, 0, 1 }, { 1, 1, 0, 0, 0, 1, 1, 0, 1 }, { 1, 0, 0, 1, 0, 0, 0, 0, 1 }, { 1, 0, 0, 1, 1, 1, 1, 0, 1 }, { 1, 0, 0, 1, 1, 0, 0, 0, 1 }, { 1, 1, 0, 1, 0, 0, 0, 0, 1 }, { 1, 1, 0, 1, 0, 1, 1, 0, 1 }, { 1, 1, 1, 1, 1, 1, 1, 1, 1 }, }; // Now, we define the initial and target points from which the // robot will be spawn and where it should go, respectively: int agentStartX = 1; int agentStartY = 4; int agentStopX = 7; int agentStopY = 4; // The robot is able to sense the environment though 8 sensors // that capture whether the robot is near a wall or not. Based // on the robot's current location, the sensors will return an // integer number representing which sensors have detected walls Func <int, int, int> getState = (int x, int y) => { int c1 = (map[y - 1, x - 1] != 0) ? 1 : 0; int c2 = (map[y - 1, x + 0] != 0) ? 1 : 0; int c3 = (map[y - 1, x + 1] != 0) ? 1 : 0; int c4 = (map[y + 0, x + 1] != 0) ? 1 : 0; int c5 = (map[y + 1, x + 1] != 0) ? 1 : 0; int c6 = (map[y + 1, x + 0] != 0) ? 1 : 0; int c7 = (map[y + 1, x - 1] != 0) ? 1 : 0; int c8 = (map[y + 0, x - 1] != 0) ? 1 : 0; return(c1 | (c2 << 1) | (c3 << 2) | (c4 << 3) | (c5 << 4) | (c6 << 5) | (c7 << 6) | (c8 << 7)); }; // The actions are the possible directions the robot can go: // // - case 0: go to north (up) // - case 1: go to east (right) // - case 2: go to south (down) // - case 3: go to west (left) // int learningIterations = 1000; double explorationRate = 0.5; double learningRate = 0.5; double moveReward = 0; double wallReward = -1; double goalReward = 1; // The function below specifies how the robot should perform an action given its // current position and an action number. This will cause the robot to update its // current X and Y locations given the direction (above) it was instructed to go: Func <int, int, int, Tuple <double, int, int> > doAction = (int currentX, int currentY, int action) => { // default reward is equal to moving reward double reward = moveReward; // moving direction int dx = 0, dy = 0; switch (action) { case 0: // go to north (up) dy = -1; break; case 1: // go to east (right) dx = 1; break; case 2: // go to south (down) dy = 1; break; case 3: // go to west (left) dx = -1; break; } int newX = currentX + dx; int newY = currentY + dy; // check new agent's coordinates if ((map[newY, newX] != 0) || (newX < 0) || (newX >= map.Columns()) || (newY < 0) || (newY >= map.Rows())) { // we found a wall or got outside of the world reward = wallReward; } else { currentX = newX; currentY = newY; // check if we found the goal if ((currentX == agentStopX) && (currentY == agentStopY)) { reward = goalReward; } } return(Tuple.Create(reward, currentX, currentY)); }; // After defining all those functions, we create a new Sarsa algorithm: var explorationPolicy = new EpsilonGreedyExploration(explorationRate); var tabuPolicy = new TabuSearchExploration(4, explorationPolicy); var qLearning = new QLearning(256, 4, tabuPolicy); // curent coordinates of the agent int agentCurrentX = -1; int agentCurrentY = -1; bool needToStop = false; int iteration = 0; // loop while ((!needToStop) && (iteration < learningIterations)) { // set exploration rate for this iteration explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate; // set learning rate for this iteration qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate; // clear tabu list tabuPolicy.ResetTabuList(); // reset agent's coordinates to the starting position agentCurrentX = agentStartX; agentCurrentY = agentStartY; // previous state and action int previousState = getState(agentCurrentX, agentCurrentY); int previousAction = qLearning.GetAction(previousState); // update agent's current position and get his reward var r = doAction(agentCurrentX, agentCurrentY, previousAction); double reward = r.Item1; agentCurrentX = r.Item2; agentCurrentY = r.Item3; // loop while ((!needToStop) && (iteration < learningIterations)) { // set exploration rate for this iteration explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate; // set learning rate for this iteration qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate; // clear tabu list tabuPolicy.ResetTabuList(); // reset agent's coordinates to the starting position agentCurrentX = agentStartX; agentCurrentY = agentStartY; // steps performed by agent to get to the goal int steps = 0; while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY))) { steps++; // get agent's current state int currentState = getState(agentCurrentX, agentCurrentY); // get the action for this state int action = qLearning.GetAction(currentState); // update agent's current position and get his reward r = doAction(agentCurrentX, agentCurrentY, action); reward = r.Item1; agentCurrentX = r.Item2; agentCurrentY = r.Item3; // get agent's next state int nextState = getState(agentCurrentX, agentCurrentY); // do learning of the agent - update his Q-function qLearning.UpdateState(currentState, action, reward, nextState); // set tabu action tabuPolicy.SetTabuAction((action + 2) % 4, 1); } System.Diagnostics.Debug.WriteLine(steps); iteration++; } } // The end position for the robot will be (7, 4): int finalPosX = agentCurrentX; // 7 int finalPosY = agentCurrentY; // 4; #endregion Assert.AreEqual(7, finalPosX); Assert.AreEqual(4, finalPosY); }
void FixedUpdate() { if (_showSolution) { // move every 0.5 seconds if ((_timeStep + 0.25f) < Time.time) { _timeStep = Time.time; if ((_agentCurrX == _agentStopX) && (_agentCurrY == _agentStopY)) { PlayerObject.localPosition = new Vector3(_agentStartX, 0, _agentStartY); _agentCurrX = _agentStartX; _agentCurrY = _agentStartY; } else { if (_initShowSolution) { _initShowSolution = false; // set exploration rate to 0, so agent uses only what he learnt TabuSearchExploration tabuPolicy = null; EpsilonGreedyExploration exploratioPolicy = null; if (_qLearning != null) { tabuPolicy = (TabuSearchExploration)_qLearning.ExplorationPolicy; } else if (_sarsa != null) { tabuPolicy = (TabuSearchExploration)_sarsa.ExplorationPolicy; } else { tabuPolicy = (TabuSearchExploration)_qLearning_FDGS.ExplorationPolicy; } exploratioPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy; exploratioPolicy.Epsilon = 0; tabuPolicy.ResetTabuList(); PlayerObject.localPosition = new Vector3(_agentStartX, 0, _agentStartY); // current coordinates of the agent _agentCurrX = (int)PlayerObject.localPosition.x; _agentCurrY = (int)PlayerObject.localPosition.z; } if ((_qLearning != null) || (_sarsa != null)) { // get agent's current state int currentState = GetStateNumber(_agentCurrX, _agentCurrY); // get the action for this state int action = (_qLearning != null) ? _qLearning.GetAction(currentState) : _sarsa.GetAction(currentState); // update agent's current position and get his reward UpdateAgentPosition(action); } else { // get agent's current state int currentState = _qLearning_FDGS.GetStateFromCoordinates(_agentCurrX, _agentCurrY); // get the action for this state int action = _qLearning_FDGS.GetLearnedAction(currentState); // update agent's current position UpdateAgentPosition(currentState, action); } // set player object position PlayerObject.localPosition = new Vector3(_agentCurrX, 0, _agentCurrY); } } } else { if (!_needToStop) { // show current iteration References.CurrentIteration.text = _currentIteration.ToString(); } if (_enableControls) { _enableControls = false; // enable settings controls References.EnableControls(true); } } }