Ejemplos de QLearning.UpdateState en C# (CSharp)

Lenguaje de programación: C# (CSharp)

Clase / Tipo: QLearning

Método / Función: UpdateState

Ejemplos en hotexamples.com: 6

C# (CSharp) QLearning.UpdateState - 6 ejemplos encontrados. Estos son los ejemplos en C# (CSharp) del mundo real mejor valorados de QLearning.UpdateState extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

GetAction(7)

UpdateState(6)

printQTable(3)

resetMemory(2)

LoadModel(2)

Loop(2)

CreateMap(2)

CreateVisualizer(2)

setController(2)

saveReward(2)

New(2)

printRewardList(2)

Reset(2)

loadQTable(2)

RemakeModel(2)

AIStart(1)

Train(1)

getAction(1)

nextStep(1)

getE(1)

init(1)

Start(1)

setE(1)

updateTable(1)

Step(1)

Restart(1)

ShowPolicy(1)

GetSumQValue(1)

Begin(1)

Execute(1)

GetCurrentCell(1)

GetPath(1)

GetQTable(1)

GetReward(1)

GreedyPolicy(1)

SaveModel(1)

IsTraining(1)

Iterate(1)

Move(1)

PrintQLearningStructure(1)

AddState(1)

RestartEpisode(1)

RunTraining(1)

writeTrainingData(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: MainForm.cs Proyecto: RitterRBC/framework

        // Q-Learning thread
        private void QLearningThread()
        {
            int iteration = 0;
            // current coordinates of the agent
            int agentCurrentX, agentCurrentY;
            // exploration policy
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)qLearning.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                // set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                // set learning rate for this iteration
                qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                // clear tabu list
                tabuPolicy.ResetTabuList();

                // reset agent's coordinates to the starting position
                agentCurrentX = agentStartX;
                agentCurrentY = agentStartY;

                // steps performed by agent to get to the goal
                int steps = 0;

                while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                {
                    steps++;
                    // get agent's current state
                    int currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // get the action for this state
                    int action = qLearning.GetAction(currentState);
                    // update agent's current position and get his reward
                    double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);
                    // get agent's next state
                    int nextState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // do learning of the agent - update his Q-function
                    qLearning.UpdateState(currentState, action, reward, nextState);

                    // set tabu action
                    tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                }

                System.Diagnostics.Debug.WriteLine(steps);

                iteration++;

                // show current iteration
                SetText(iterationBox, iteration.ToString());
            }

            // enable settings controls
            EnableControls(true);
        }

Ejemplo n.º 2

Mostrar archivo

Archivo: Environment.cs Proyecto: RitterRBC/framework

    // Q-Learning thread
    private void QLearningThread()
    {
        _currentIteration = 0;

        // exploration policy
        TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)_qLearning.ExplorationPolicy;
        EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

        // loop
        while ((!_needToStop) && (_currentIteration < learningIterations))
        {
            // set exploration rate for this iteration
            explorationPolicy.Epsilon = explorationRate - ((double)_currentIteration / learningIterations) * explorationRate;
            // set learning rate for this iteration
            _qLearning.LearningRate = learningRate - ((double)_currentIteration / learningIterations) * learningRate;
            // clear tabu list
            tabuPolicy.ResetTabuList();

            // reset agent's coordinates to the starting position
            _agentCurrX = _agentStartX;
            _agentCurrY = _agentStartY;

            // steps performed by agent to get to the goal
            int steps = 0;

            while ((!_needToStop) && ((_agentCurrX != _agentStopX) || (_agentCurrY != _agentStopY)))
            {
                steps++;
                // get agent's current state
                int currentState = GetStateNumber(_agentCurrX, _agentCurrY);
                // get the action for this state
                int action = _qLearning.GetAction(currentState);
                // update agent's current position and get his reward
                double reward = UpdateAgentPosition(action);
                // get agent's next state
                int nextState = GetStateNumber(_agentCurrX, _agentCurrY);
                // do learning of the agent - update his Q-function
                _qLearning.UpdateState(currentState, action, reward, nextState);

                // set tabu action
                tabuPolicy.SetTabuAction((action + 2) % 4, 1);
            }

            _currentIteration++;
            Debug.Log(string.Format("{0} steps needed for iteration {1}.", steps, _currentIteration));
        }

        _enableControls = true;
        Debug.Log("QLearning training finished. Try to execute the solution.");
    }

Ejemplo n.º 3

Mostrar archivo

        // Q-Learning thread
        private void QLearningThread()
        {
            int iteration = 0;
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)qLearning.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            while ((!needToStop) && (iteration < learningIterations))
            {
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                qLearning.LearningRate    = learningRate - ((double)iteration / learningIterations) * learningRate;
                tabuPolicy.ResetTabuList();

                var agentCurrentX = agentStartX;
                var agentCurrentY = agentStartY;

                int steps = 0;
                while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                {
                    steps++;
                    int    currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                    int    action       = qLearning.GetAction(currentState);
                    double reward       = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);
                    int    nextState    = GetStateNumber(agentCurrentX, agentCurrentY);

                    // do learning of the agent - update his Q-function, set Tabu action
                    qLearning.UpdateState(currentState, action, reward, nextState);
                    tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                }

                System.Diagnostics.Debug.WriteLine(steps);
                iteration++;

                SetText(iterationBox, iteration.ToString());
            }

            EnableControls(true);
        }

Ejemplo n.º 4

Mostrar archivo

Archivo: Q.cs Proyecto: MladenMiletic/NET_TO_VISSIM

 /// <summary>
 /// Updates Q table based on the calculated reward
 /// </summary>
 /// <param name="previousState">state number of previous state</param>
 /// <param name="action">number of action that was taken</param>
 /// <param name="reward">reward given</param>
 /// <param name="nextState">number of the next state</param>
 public void UpdateQTable(int previousState, int action, double reward, int nextState)
 {
     qLearning.UpdateState(previousState, action, reward, nextState);
 }

Ejemplo n.º 5

Mostrar archivo

Archivo: QLearningTest.cs Proyecto: jthornca/accord-framework

        public void learn_test()
        {
            #region doc_main
            // Fix the random number generator
            Accord.Math.Random.Generator.Seed = 0;

            // In this example, we will be using the QLearning algorithm
            // to make a robot learn how to navigate a map. The map is
            // shown below, where a 1 denotes a wall and 0 denotes areas
            // where the robot can navigate:
            //
            int[,] map =
            {
                { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
                { 1, 1, 0, 0, 0, 0, 0, 0, 1 },
                { 1, 1, 0, 0, 0, 1, 1, 0, 1 },
                { 1, 0, 0, 1, 0, 0, 0, 0, 1 },
                { 1, 0, 0, 1, 1, 1, 1, 0, 1 },
                { 1, 0, 0, 1, 1, 0, 0, 0, 1 },
                { 1, 1, 0, 1, 0, 0, 0, 0, 1 },
                { 1, 1, 0, 1, 0, 1, 1, 0, 1 },
                { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
            };

            // Now, we define the initial and target points from which the
            // robot will be spawn and where it should go, respectively:
            int agentStartX = 1;
            int agentStartY = 4;

            int agentStopX = 7;
            int agentStopY = 4;

            // The robot is able to sense the environment though 8 sensors
            // that capture whether the robot is near a wall or not. Based
            // on the robot's current location, the sensors will return an
            // integer number representing which sensors have detected walls

            Func <int, int, int> getState = (int x, int y) =>
            {
                int c1 = (map[y - 1, x - 1] != 0) ? 1 : 0;
                int c2 = (map[y - 1, x + 0] != 0) ? 1 : 0;
                int c3 = (map[y - 1, x + 1] != 0) ? 1 : 0;
                int c4 = (map[y + 0, x + 1] != 0) ? 1 : 0;
                int c5 = (map[y + 1, x + 1] != 0) ? 1 : 0;
                int c6 = (map[y + 1, x + 0] != 0) ? 1 : 0;
                int c7 = (map[y + 1, x - 1] != 0) ? 1 : 0;
                int c8 = (map[y + 0, x - 1] != 0) ? 1 : 0;

                return(c1 | (c2 << 1) | (c3 << 2) | (c4 << 3) | (c5 << 4) | (c6 << 5) | (c7 << 6) | (c8 << 7));
            };

            // The actions are the possible directions the robot can go:
            //
            //   - case 0: go to north (up)
            //   - case 1: go to east (right)
            //   - case 2: go to south (down)
            //   - case 3: go to west (left)
            //

            int    learningIterations = 1000;
            double explorationRate    = 0.5;
            double learningRate       = 0.5;

            double moveReward = 0;
            double wallReward = -1;
            double goalReward = 1;

            // The function below specifies how the robot should perform an action given its
            // current position and an action number. This will cause the robot to update its
            // current X and Y locations given the direction (above) it was instructed to go:
            Func <int, int, int, Tuple <double, int, int> > doAction = (int currentX, int currentY, int action) =>
            {
                // default reward is equal to moving reward
                double reward = moveReward;

                // moving direction
                int dx = 0, dy = 0;

                switch (action)
                {
                case 0:             // go to north (up)
                    dy = -1;
                    break;

                case 1:             // go to east (right)
                    dx = 1;
                    break;

                case 2:             // go to south (down)
                    dy = 1;
                    break;

                case 3:             // go to west (left)
                    dx = -1;
                    break;
                }

                int newX = currentX + dx;
                int newY = currentY + dy;

                // check new agent's coordinates
                if ((map[newY, newX] != 0) || (newX < 0) || (newX >= map.Columns()) || (newY < 0) || (newY >= map.Rows()))
                {
                    // we found a wall or got outside of the world
                    reward = wallReward;
                }
                else
                {
                    currentX = newX;
                    currentY = newY;

                    // check if we found the goal
                    if ((currentX == agentStopX) && (currentY == agentStopY))
                    {
                        reward = goalReward;
                    }
                }

                return(Tuple.Create(reward, currentX, currentY));
            };


            // After defining all those functions, we create a new Sarsa algorithm:
            var explorationPolicy = new EpsilonGreedyExploration(explorationRate);
            var tabuPolicy        = new TabuSearchExploration(4, explorationPolicy);
            var qLearning         = new QLearning(256, 4, tabuPolicy);

            // curent coordinates of the agent
            int agentCurrentX = -1;
            int agentCurrentY = -1;

            bool needToStop = false;
            int  iteration  = 0;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                // set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;

                // set learning rate for this iteration
                qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;

                // clear tabu list
                tabuPolicy.ResetTabuList();

                // reset agent's coordinates to the starting position
                agentCurrentX = agentStartX;
                agentCurrentY = agentStartY;

                // previous state and action
                int previousState  = getState(agentCurrentX, agentCurrentY);
                int previousAction = qLearning.GetAction(previousState);

                // update agent's current position and get his reward
                var    r      = doAction(agentCurrentX, agentCurrentY, previousAction);
                double reward = r.Item1;
                agentCurrentX = r.Item2;
                agentCurrentY = r.Item3;

                // loop
                while ((!needToStop) && (iteration < learningIterations))
                {
                    // set exploration rate for this iteration
                    explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                    // set learning rate for this iteration
                    qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                    // clear tabu list
                    tabuPolicy.ResetTabuList();

                    // reset agent's coordinates to the starting position
                    agentCurrentX = agentStartX;
                    agentCurrentY = agentStartY;

                    // steps performed by agent to get to the goal
                    int steps = 0;

                    while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                    {
                        steps++;
                        // get agent's current state
                        int currentState = getState(agentCurrentX, agentCurrentY);

                        // get the action for this state
                        int action = qLearning.GetAction(currentState);

                        // update agent's current position and get his reward
                        r             = doAction(agentCurrentX, agentCurrentY, action);
                        reward        = r.Item1;
                        agentCurrentX = r.Item2;
                        agentCurrentY = r.Item3;

                        // get agent's next state
                        int nextState = getState(agentCurrentX, agentCurrentY);

                        // do learning of the agent - update his Q-function
                        qLearning.UpdateState(currentState, action, reward, nextState);

                        // set tabu action
                        tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                    }

                    System.Diagnostics.Debug.WriteLine(steps);

                    iteration++;
                }
            }

            // The end position for the robot will be (7, 4):
            int finalPosX = agentCurrentX; // 7
            int finalPosY = agentCurrentY; // 4;
            #endregion

            Assert.AreEqual(7, finalPosX);
            Assert.AreEqual(4, finalPosY);
        }

Ejemplo n.º 6

Mostrar archivo

        private void QLearningThread()
        {
            int iteration = 0;
            // 当前坐标的代理 curent coordinates of the agent
            int agentCurrentX, agentCurrentY;
            // 探索策略 exploration policy
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)qLearning.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                OldAction.Clear();
                // 为这个迭代设置勘探速率 set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                // 为迭代设置学习率 set learning rate for this iteration
                qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                // 清除tabu列表 clear tabu list
                tabuPolicy.ResetTabuList();

                //复位代理的坐标到起始位置 reset agent's coordinates to the starting position
                agentCurrentX = _agentStartX;
                agentCurrentY = _agentStartY;

                // 代理执行的步骤以达到目标 steps performed by agent to get to the goal
                int steps = 0;

                while ((!needToStop) && ((agentCurrentX != _agentStopX) || (agentCurrentY != _agentStopY)))
                {
                    steps++;

                    // 获取代理的当前状态 get agent's current state
                    int currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // 获取此状态的操作 get the action for this state
                    int action = qLearning.GetAction(currentState);
                    tabuPolicy.ResetTabuList();

                    // 更新代理的当前位置并得到他的奖励 update agent's current position and get his reward
                    double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);
                    // 获取代理的下一个状态 get agent's next state
                    int nextState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // 做学习代理 - 更新他的Q函数 do learning of the agent - update his Q-function
                    qLearning.UpdateState(currentState, action, reward, nextState);
                    OldAction.Add(Tuple.Create(currentState, action, reward, nextState));

                    // 设置tabu动作 set tabu action
                    tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                }
                for (int i = OldAction.Count - 1; i >= 0; i--)
                {
                    var a = OldAction[i];
                    qLearning.UpdateState(a.Item1, a.Item2, a.Item3, a.Item4);
                }

                System.Diagnostics.Debug.WriteLine(steps);

                iteration++;

                // 显示当前迭代 show current iteration
                SetText(iterationBox, iteration.ToString());
            }

            // 启用设置控件 enable settings controls
            EnableControls(true);
        }