예제 #1
0
        // Q-Learning thread
        private void QLearningThread()
        {
            int iteration = 0;
            // current coordinates of the agent
            int agentCurrentX, agentCurrentY;
            // exploration policy
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)qLearning.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                // set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                // set learning rate for this iteration
                qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                // clear tabu list
                tabuPolicy.ResetTabuList();

                // reset agent's coordinates to the starting position
                agentCurrentX = agentStartX;
                agentCurrentY = agentStartY;

                // steps performed by agent to get to the goal
                int steps = 0;

                while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                {
                    steps++;
                    // get agent's current state
                    int currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // get the action for this state
                    int action = qLearning.GetAction(currentState);
                    // update agent's current position and get his reward
                    double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);
                    // get agent's next state
                    int nextState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // do learning of the agent - update his Q-function
                    qLearning.UpdateState(currentState, action, reward, nextState);

                    // set tabu action
                    tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                }

                System.Diagnostics.Debug.WriteLine(steps);

                iteration++;

                // show current iteration
                SetText(iterationBox, iteration.ToString());
            }

            // enable settings controls
            EnableControls(true);
        }
예제 #2
0
    // Q-Learning thread
    private void QLearningThread()
    {
        _currentIteration = 0;

        // exploration policy
        TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)_qLearning.ExplorationPolicy;
        EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

        // loop
        while ((!_needToStop) && (_currentIteration < learningIterations))
        {
            // set exploration rate for this iteration
            explorationPolicy.Epsilon = explorationRate - ((double)_currentIteration / learningIterations) * explorationRate;
            // set learning rate for this iteration
            _qLearning.LearningRate = learningRate - ((double)_currentIteration / learningIterations) * learningRate;
            // clear tabu list
            tabuPolicy.ResetTabuList();

            // reset agent's coordinates to the starting position
            _agentCurrX = _agentStartX;
            _agentCurrY = _agentStartY;

            // steps performed by agent to get to the goal
            int steps = 0;

            while ((!_needToStop) && ((_agentCurrX != _agentStopX) || (_agentCurrY != _agentStopY)))
            {
                steps++;
                // get agent's current state
                int currentState = GetStateNumber(_agentCurrX, _agentCurrY);
                // get the action for this state
                int action = _qLearning.GetAction(currentState);
                // update agent's current position and get his reward
                double reward = UpdateAgentPosition(action);
                // get agent's next state
                int nextState = GetStateNumber(_agentCurrX, _agentCurrY);
                // do learning of the agent - update his Q-function
                _qLearning.UpdateState(currentState, action, reward, nextState);

                // set tabu action
                tabuPolicy.SetTabuAction((action + 2) % 4, 1);
            }

            _currentIteration++;
            Debug.Log(string.Format("{0} steps needed for iteration {1}.", steps, _currentIteration));
        }

        _enableControls = true;
        Debug.Log("QLearning training finished. Try to execute the solution.");
    }
예제 #3
0
        // Sarsa thread
        private void SarsaThread()
        {
            int iteration = 0;
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)sarsa.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            while ((!needToStop) && (iteration < learningIterations))
            {
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                sarsa.LearningRate        = learningRate - ((double)iteration / learningIterations) * learningRate;
                tabuPolicy.ResetTabuList();

                var    agentCurrentX  = agentStartX;
                var    agentCurrentY  = agentStartY;
                int    steps          = 1;
                int    previousState  = GetStateNumber(agentCurrentX, agentCurrentY);
                int    previousAction = sarsa.GetAction(previousState);
                double reward         = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, previousAction);

                while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                {
                    steps++;

                    tabuPolicy.SetTabuAction((previousAction + 2) % 4, 1);
                    int nextState  = GetStateNumber(agentCurrentX, agentCurrentY);
                    int nextAction = sarsa.GetAction(nextState);
                    sarsa.UpdateState(previousState, previousAction, reward, nextState, nextAction);
                    reward         = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, nextAction);
                    previousState  = nextState;
                    previousAction = nextAction;
                }

                if (!needToStop)
                {
                    sarsa.UpdateState(previousState, previousAction, reward);
                }

                System.Diagnostics.Debug.WriteLine(steps);

                iteration++;

                SetText(iterationBox, iteration.ToString());
            }

            // enable settings controls
            EnableControls(true);
        }
예제 #4
0
        // Q-Learning thread
        private void QLearningThread()
        {
            int iteration = 0;
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)qLearning.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            while ((!needToStop) && (iteration < learningIterations))
            {
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                qLearning.LearningRate    = learningRate - ((double)iteration / learningIterations) * learningRate;
                tabuPolicy.ResetTabuList();

                var agentCurrentX = agentStartX;
                var agentCurrentY = agentStartY;

                int steps = 0;
                while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                {
                    steps++;
                    int    currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                    int    action       = qLearning.GetAction(currentState);
                    double reward       = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);
                    int    nextState    = GetStateNumber(agentCurrentX, agentCurrentY);

                    // do learning of the agent - update his Q-function, set Tabu action
                    qLearning.UpdateState(currentState, action, reward, nextState);
                    tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                }

                System.Diagnostics.Debug.WriteLine(steps);
                iteration++;

                SetText(iterationBox, iteration.ToString());
            }

            EnableControls(true);
        }
예제 #5
0
        // Show solution thread
        private void ShowSolutionThread()
        {
            // set exploration rate to 0, so agent uses only what he learnt
            TabuSearchExploration    tabuPolicy       = null;
            EpsilonGreedyExploration exploratioPolicy = null;

            if (qLearning != null)
            {
                tabuPolicy = (TabuSearchExploration)qLearning.ExplorationPolicy;
            }
            else if (sarsa != null)
            {
                tabuPolicy = (TabuSearchExploration)sarsa.ExplorationPolicy;
            }
            else
            {
                throw new Exception();
            }

            exploratioPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            exploratioPolicy.Epsilon = 0;
            tabuPolicy.ResetTabuList();

            // curent coordinates of the agent
            int agentCurrentX = agentStartX, agentCurrentY = agentStartY;

            // pripate the map to display
            Array.Copy(map, mapToDisplay, mapWidth * mapHeight);
            mapToDisplay[agentStartY, agentStartX] = 2;
            mapToDisplay[agentStopY, agentStopX]   = 3;

            while (!needToStop)
            {
                // dispay the map
                cellWorld.Map = mapToDisplay;
                // sleep for a while
                Thread.Sleep(200);

                // check if we have reached the end point
                if ((agentCurrentX == agentStopX) && (agentCurrentY == agentStopY))
                {
                    // restore the map
                    mapToDisplay[agentStartY, agentStartX] = 2;
                    mapToDisplay[agentStopY, agentStopX]   = 3;

                    agentCurrentX = agentStartX;
                    agentCurrentY = agentStartY;

                    cellWorld.Map = mapToDisplay;
                    Thread.Sleep(200);
                }

                // remove agent from current position
                mapToDisplay[agentCurrentY, agentCurrentX] = 0;

                // get agent's current state
                int currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                // get the action for this state
                int action = (qLearning != null) ? qLearning.GetAction(currentState) : sarsa.GetAction(currentState);
                // update agent's current position and get his reward
                double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);

                // put agent to the new position
                mapToDisplay[agentCurrentY, agentCurrentX] = 2;
            }

            // enable settings controls
            EnableControls(true);
        }
예제 #6
0
        public void learn_test()
        {
            #region doc_main
            // Fix the random number generator
            Accord.Math.Random.Generator.Seed = 0;

            // In this example, we will be using the QLearning algorithm
            // to make a robot learn how to navigate a map. The map is
            // shown below, where a 1 denotes a wall and 0 denotes areas
            // where the robot can navigate:
            //
            int[,] map =
            {
                { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
                { 1, 1, 0, 0, 0, 0, 0, 0, 1 },
                { 1, 1, 0, 0, 0, 1, 1, 0, 1 },
                { 1, 0, 0, 1, 0, 0, 0, 0, 1 },
                { 1, 0, 0, 1, 1, 1, 1, 0, 1 },
                { 1, 0, 0, 1, 1, 0, 0, 0, 1 },
                { 1, 1, 0, 1, 0, 0, 0, 0, 1 },
                { 1, 1, 0, 1, 0, 1, 1, 0, 1 },
                { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
            };

            // Now, we define the initial and target points from which the
            // robot will be spawn and where it should go, respectively:
            int agentStartX = 1;
            int agentStartY = 4;

            int agentStopX = 7;
            int agentStopY = 4;

            // The robot is able to sense the environment though 8 sensors
            // that capture whether the robot is near a wall or not. Based
            // on the robot's current location, the sensors will return an
            // integer number representing which sensors have detected walls

            Func <int, int, int> getState = (int x, int y) =>
            {
                int c1 = (map[y - 1, x - 1] != 0) ? 1 : 0;
                int c2 = (map[y - 1, x + 0] != 0) ? 1 : 0;
                int c3 = (map[y - 1, x + 1] != 0) ? 1 : 0;
                int c4 = (map[y + 0, x + 1] != 0) ? 1 : 0;
                int c5 = (map[y + 1, x + 1] != 0) ? 1 : 0;
                int c6 = (map[y + 1, x + 0] != 0) ? 1 : 0;
                int c7 = (map[y + 1, x - 1] != 0) ? 1 : 0;
                int c8 = (map[y + 0, x - 1] != 0) ? 1 : 0;

                return(c1 | (c2 << 1) | (c3 << 2) | (c4 << 3) | (c5 << 4) | (c6 << 5) | (c7 << 6) | (c8 << 7));
            };

            // The actions are the possible directions the robot can go:
            //
            //   - case 0: go to north (up)
            //   - case 1: go to east (right)
            //   - case 2: go to south (down)
            //   - case 3: go to west (left)
            //

            int    learningIterations = 1000;
            double explorationRate    = 0.5;
            double learningRate       = 0.5;

            double moveReward = 0;
            double wallReward = -1;
            double goalReward = 1;

            // The function below specifies how the robot should perform an action given its
            // current position and an action number. This will cause the robot to update its
            // current X and Y locations given the direction (above) it was instructed to go:
            Func <int, int, int, Tuple <double, int, int> > doAction = (int currentX, int currentY, int action) =>
            {
                // default reward is equal to moving reward
                double reward = moveReward;

                // moving direction
                int dx = 0, dy = 0;

                switch (action)
                {
                case 0:             // go to north (up)
                    dy = -1;
                    break;

                case 1:             // go to east (right)
                    dx = 1;
                    break;

                case 2:             // go to south (down)
                    dy = 1;
                    break;

                case 3:             // go to west (left)
                    dx = -1;
                    break;
                }

                int newX = currentX + dx;
                int newY = currentY + dy;

                // check new agent's coordinates
                if ((map[newY, newX] != 0) || (newX < 0) || (newX >= map.Columns()) || (newY < 0) || (newY >= map.Rows()))
                {
                    // we found a wall or got outside of the world
                    reward = wallReward;
                }
                else
                {
                    currentX = newX;
                    currentY = newY;

                    // check if we found the goal
                    if ((currentX == agentStopX) && (currentY == agentStopY))
                    {
                        reward = goalReward;
                    }
                }

                return(Tuple.Create(reward, currentX, currentY));
            };


            // After defining all those functions, we create a new Sarsa algorithm:
            var explorationPolicy = new EpsilonGreedyExploration(explorationRate);
            var tabuPolicy        = new TabuSearchExploration(4, explorationPolicy);
            var qLearning         = new QLearning(256, 4, tabuPolicy);

            // curent coordinates of the agent
            int agentCurrentX = -1;
            int agentCurrentY = -1;

            bool needToStop = false;
            int  iteration  = 0;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                // set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;

                // set learning rate for this iteration
                qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;

                // clear tabu list
                tabuPolicy.ResetTabuList();

                // reset agent's coordinates to the starting position
                agentCurrentX = agentStartX;
                agentCurrentY = agentStartY;

                // previous state and action
                int previousState  = getState(agentCurrentX, agentCurrentY);
                int previousAction = qLearning.GetAction(previousState);

                // update agent's current position and get his reward
                var    r      = doAction(agentCurrentX, agentCurrentY, previousAction);
                double reward = r.Item1;
                agentCurrentX = r.Item2;
                agentCurrentY = r.Item3;

                // loop
                while ((!needToStop) && (iteration < learningIterations))
                {
                    // set exploration rate for this iteration
                    explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                    // set learning rate for this iteration
                    qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                    // clear tabu list
                    tabuPolicy.ResetTabuList();

                    // reset agent's coordinates to the starting position
                    agentCurrentX = agentStartX;
                    agentCurrentY = agentStartY;

                    // steps performed by agent to get to the goal
                    int steps = 0;

                    while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                    {
                        steps++;
                        // get agent's current state
                        int currentState = getState(agentCurrentX, agentCurrentY);

                        // get the action for this state
                        int action = qLearning.GetAction(currentState);

                        // update agent's current position and get his reward
                        r             = doAction(agentCurrentX, agentCurrentY, action);
                        reward        = r.Item1;
                        agentCurrentX = r.Item2;
                        agentCurrentY = r.Item3;

                        // get agent's next state
                        int nextState = getState(agentCurrentX, agentCurrentY);

                        // do learning of the agent - update his Q-function
                        qLearning.UpdateState(currentState, action, reward, nextState);

                        // set tabu action
                        tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                    }

                    System.Diagnostics.Debug.WriteLine(steps);

                    iteration++;
                }
            }

            // The end position for the robot will be (7, 4):
            int finalPosX = agentCurrentX; // 7
            int finalPosY = agentCurrentY; // 4;
            #endregion

            Assert.AreEqual(7, finalPosX);
            Assert.AreEqual(4, finalPosY);
        }
예제 #7
0
    // Sarsa thread
    private void SarsaThread()
    {
        int iteration = 0;

        // exploration policy
        TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)_sarsa.ExplorationPolicy;
        EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

        // loop
        while ((!_needToStop) && (iteration < learningIterations))
        {
            // set exploration rate for this iteration
            explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
            // set learning rate for this iteration
            _sarsa.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
            // clear tabu list
            tabuPolicy.ResetTabuList();

            // reset agent's coordinates to the starting position
            _agentCurrX = _agentStartX;
            _agentCurrY = _agentStartY;

            // steps performed by agent to get to the goal
            int steps = 1;
            // previous state and action
            int previousState  = GetStateNumber(_agentCurrX, _agentCurrY);
            int previousAction = _sarsa.GetAction(previousState);
            // update agent's current position and get his reward
            double reward = UpdateAgentPosition(previousAction);

            while ((!_needToStop) && ((_agentCurrX != _agentStopX) || (_agentCurrY != _agentStopY)))
            {
                steps++;

                // set tabu action
                tabuPolicy.SetTabuAction((previousAction + 2) % 4, 1);

                // get agent's next state
                int nextState = GetStateNumber(_agentCurrX, _agentCurrY);
                // get agent's next action
                int nextAction = _sarsa.GetAction(nextState);
                // do learning of the agent - update his Q-function
                _sarsa.UpdateState(previousState, previousAction, reward, nextState, nextAction);

                // update agent's new position and get his reward
                reward = UpdateAgentPosition(nextAction);

                previousState  = nextState;
                previousAction = nextAction;
            }

            if (!_needToStop)
            {
                // update Q-function if terminal state was reached
                _sarsa.UpdateState(previousState, previousAction, reward);
            }

            iteration++;
            Debug.Log(string.Format("{0} steps needed for iteration {1}.", steps, iteration));
        }

        _enableControls = true;
        Debug.Log("SARSA training finished. Try to execute the solution.");
    }
예제 #8
0
    void FixedUpdate()
    {
        if (_showSolution)
        {
            // move every 0.5 seconds
            if ((_timeStep + 0.25f) < Time.time)
            {
                _timeStep = Time.time;

                if ((_agentCurrX == _agentStopX) && (_agentCurrY == _agentStopY))
                {
                    PlayerObject.localPosition = new Vector3(_agentStartX, 0, _agentStartY);

                    _agentCurrX = _agentStartX;
                    _agentCurrY = _agentStartY;
                }
                else
                {
                    if (_initShowSolution)
                    {
                        _initShowSolution = false;

                        // set exploration rate to 0, so agent uses only what he learnt
                        TabuSearchExploration    tabuPolicy       = null;
                        EpsilonGreedyExploration exploratioPolicy = null;

                        if (_qLearning != null)
                        {
                            tabuPolicy = (TabuSearchExploration)_qLearning.ExplorationPolicy;
                        }
                        else if (_sarsa != null)
                        {
                            tabuPolicy = (TabuSearchExploration)_sarsa.ExplorationPolicy;
                        }
                        else
                        {
                            tabuPolicy = (TabuSearchExploration)_qLearning_FDGS.ExplorationPolicy;
                        }

                        exploratioPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

                        exploratioPolicy.Epsilon = 0;
                        tabuPolicy.ResetTabuList();

                        PlayerObject.localPosition = new Vector3(_agentStartX, 0, _agentStartY);

                        // current coordinates of the agent
                        _agentCurrX = (int)PlayerObject.localPosition.x;
                        _agentCurrY = (int)PlayerObject.localPosition.z;
                    }

                    if ((_qLearning != null) || (_sarsa != null))
                    {
                        // get agent's current state
                        int currentState = GetStateNumber(_agentCurrX, _agentCurrY);
                        // get the action for this state
                        int action = (_qLearning != null) ? _qLearning.GetAction(currentState) : _sarsa.GetAction(currentState);
                        // update agent's current position and get his reward
                        UpdateAgentPosition(action);
                    }
                    else
                    {
                        // get agent's current state
                        int currentState = _qLearning_FDGS.GetStateFromCoordinates(_agentCurrX, _agentCurrY);
                        // get the action for this state
                        int action = _qLearning_FDGS.GetLearnedAction(currentState);
                        // update agent's current position
                        UpdateAgentPosition(currentState, action);
                    }

                    // set player object position
                    PlayerObject.localPosition = new Vector3(_agentCurrX, 0, _agentCurrY);
                }
            }
        }
        else
        {
            if (!_needToStop)
            {
                // show current iteration
                References.CurrentIteration.text = _currentIteration.ToString();
            }

            if (_enableControls)
            {
                _enableControls = false;

                // enable settings controls
                References.EnableControls(true);
            }
        }
    }
예제 #9
0
        private void SarsaThread()
        {
            int iteration = 0;
            // 当前坐标的代理 curent coordinates of the agent
            int agentCurrentX, agentCurrentY;
            // 探索策略 exploration policy
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)sarsa.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                // 为这个迭代设置勘探速率 set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                // 为迭代设置学习率 set learning rate for this iteration
                sarsa.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                // 清除tabu列表 clear tabu list
                tabuPolicy.ResetTabuList();

                // 复位代理的坐标到起始位置 reset agent's coordinates to the starting position
                agentCurrentX = _agentStartX;
                agentCurrentY = _agentStartY;

                // 代理执行的步骤以达到目标 steps performed by agent to get to the goal
                int steps = 1;
                // 以前的状态和动作 previous state and action
                int previousState  = GetStateNumber(agentCurrentX, agentCurrentY);
                int previousAction = sarsa.GetAction(previousState);
                // 更新代理的当前位置并得到他的奖励 update agent's current position and get his reward
                double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, previousAction);

                while ((!needToStop) && ((agentCurrentX != _agentStopX) || (agentCurrentY != _agentStopY)))
                {
                    steps++;

                    // 设置禁忌动作 set tabu action
                    tabuPolicy.SetTabuAction((previousAction + 2) % 4, 1);

                    // 获取代理的下一个状态 get agent's next state
                    int nextState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // 获取代理的下一个动作 get agent's next action
                    int nextAction = sarsa.GetAction(nextState);
                    // 做学习代理 - 更新他的Q函数 do learning of the agent - update his Q-function
                    sarsa.UpdateState(previousState, previousAction, reward, nextState, nextAction);

                    // 更新代理的新位置并得到他的奖励 update agent's new position and get his reward
                    reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, nextAction);

                    previousState  = nextState;
                    previousAction = nextAction;
                }

                if (!needToStop)
                {
                    // 如果达到终端状态,则更新Q函数 update Q-function if terminal state was reached
                    sarsa.UpdateState(previousState, previousAction, reward);
                }

                System.Diagnostics.Debug.WriteLine(steps);

                iteration++;

                // show current iteration
                SetText(iterationBox, iteration.ToString());
            }

            // enable settings controls
            EnableControls(true);
        }
예제 #10
0
        private void DoubleQLearningThread()
        {
            miniSteps = int.MaxValue;
            MiniOldAction.Clear();
            int iteration = 0;
            // 当前坐标的代理 curent coordinates of the agent
            int agentCurrentX, agentCurrentY;
            // 探索策略 exploration policy
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)doubleQLearning.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                OldAction.Clear();
                // 为这个迭代设置勘探速率 set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                // 为迭代设置学习率 set learning rate for this iteration
                doubleQLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                // 清除tabu列表 clear tabu list
                tabuPolicy.ResetTabuList();

                //复位代理的坐标到起始位置 reset agent's coordinates to the starting position
                agentCurrentX = _agentStartX;
                agentCurrentY = _agentStartY;

                // 代理执行的步骤以达到目标 steps performed by agent to get to the goal
                int steps = 0;

                while ((!needToStop) && ((agentCurrentX != _agentStopX) || (agentCurrentY != _agentStopY)))
                {
                    steps++;

                    // 获取代理的当前状态 get agent's current state
                    int currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // 获取此状态的操作 get the action for this state
                    int action = doubleQLearning.GetAction(currentState);
                    tabuPolicy.ResetTabuList();

                    // 更新代理的当前位置并得到他的奖励 update agent's current position and get his reward
                    double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);
                    // 获取代理的下一个状态 get agent's next state
                    int nextState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // 做学习代理 - 更新他的Q函数 do learning of the agent - update his Q-function
                    doubleQLearning.UpdateState(currentState, action, reward, nextState);

                    var tup = Tuple.Create(currentState, action, reward, nextState);
                    if (OldAction.Contains(tup) == false)
                    {
                        OldAction.Add(tup);
                    }

                    // 设置tabu动作 set tabu action
                    tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                }
                for (int i = OldAction.Count - 1; i >= 0; i--)
                {
                    var a = OldAction[i];
                    doubleQLearning.UpdateState(a.Item1, a.Item2, a.Item3, a.Item4);
                }
                if (steps < miniSteps)
                {
                    miniSteps = steps;
                    MiniOldAction.Clear();
                    for (int i = 0; i < OldAction.Count; i++)
                    {
                        MiniOldAction.Add(OldAction[i]);
                    }
                }
                else
                {
                    for (int i = MiniOldAction.Count - 1; i >= 0; i--)
                    {
                        var a = MiniOldAction[i];
                        doubleQLearning.UpdateState(a.Item1, a.Item2, a.Item3, a.Item4);
                    }
                }


                System.Diagnostics.Debug.WriteLine(steps);

                iteration++;

                // 显示当前迭代 show current iteration
                SetText(iterationBox, iteration.ToString());
            }

            // 启用设置控件 enable settings controls
            EnableControls(true);
        }
예제 #11
0
        private void ShowSolutionThread()
        {
            // 将探索率设置为0,因此代理仅使用他学到的内容 set exploration rate to 0, so agent uses only what he learnt
            TabuSearchExploration    tabuPolicy       = null;
            EpsilonGreedyExploration exploratioPolicy = null;

            if (qLearning != null)
            {
                tabuPolicy = (TabuSearchExploration)qLearning.ExplorationPolicy;
            }
            else if (sarsa != null)
            {
                tabuPolicy = (TabuSearchExploration)sarsa.ExplorationPolicy;
            }
            else
            {
                tabuPolicy = (TabuSearchExploration)doubleQLearning.ExplorationPolicy;
            }

            exploratioPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            exploratioPolicy.Epsilon = 0;
            tabuPolicy.ResetTabuList();

            // 代理的当前坐标 curent coordinates of the agent
            int agentCurrentX = _agentStartX, agentCurrentY = _agentStartY;

            // pripate地图显示 pripate the map to display
            Array.Copy(map, mapToDisplay, map.GetLength(0) * map.GetLength(1));
            mapToDisplay[_agentStartX, _agentStartY] = 2;
            mapToDisplay[_agentStopX, _agentStopY]   = 3;

            while (!needToStop)
            {
                // 显示地图 dispay the map
                this.cellWorld1.Map = mapToDisplay;
                // sleep for a while
                Thread.Sleep(200);

                // 检查我们是否已经到达终点 check if we have reached the end point
                if ((agentCurrentX == _agentStopX) && (agentCurrentY == _agentStopY))
                {
                    // 恢复地图 restore the map
                    mapToDisplay[_agentStartX, _agentStartY] = 2;
                    mapToDisplay[_agentStopX, _agentStopY]   = 3;

                    agentCurrentX = _agentStartX;
                    agentCurrentY = _agentStartY;

                    this.cellWorld1.Map = mapToDisplay;
                    Thread.Sleep(200);
                }

                // 从当前位置删除代理 remove agent from current position
                mapToDisplay[agentCurrentX, agentCurrentY] = 0;

                // 获取代理的当前状态 get agent's current state
                int currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                // 获取此状态的操作 get the action for this state
                int action = GetAction(currentState);// (qLearning != null) ? qLearning.GetAction(currentState) : sarsa.GetAction(currentState);
                // 更新代理的当前位置并得到他的奖励 update agent's current position and get his reward
                double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);

                // 把代理放到新的位置 put agent to the new position
                mapToDisplay[agentCurrentX, agentCurrentY] = 2;
            }

            // 启用设置控件 enable settings controls
            EnableControls(true);
        }