Пример #1
0
        // Q-Learning thread
        private void QLearningThread()
        {
            int iteration = 0;
            // current coordinates of the agent
            int agentCurrentX, agentCurrentY;
            // exploration policy
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)qLearning.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                // set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                // set learning rate for this iteration
                qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                // clear tabu list
                tabuPolicy.ResetTabuList();

                // reset agent's coordinates to the starting position
                agentCurrentX = agentStartX;
                agentCurrentY = agentStartY;

                // steps performed by agent to get to the goal
                int steps = 0;

                while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                {
                    steps++;
                    // get agent's current state
                    int currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // get the action for this state
                    int action = qLearning.GetAction(currentState);
                    // update agent's current position and get his reward
                    double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);
                    // get agent's next state
                    int nextState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // do learning of the agent - update his Q-function
                    qLearning.UpdateState(currentState, action, reward, nextState);

                    // set tabu action
                    tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                }

                System.Diagnostics.Debug.WriteLine(steps);

                iteration++;

                // show current iteration
                SetText(iterationBox, iteration.ToString());
            }

            // enable settings controls
            EnableControls(true);
        }
Пример #2
0
    // Q-Learning thread
    private void QLearningThread()
    {
        _currentIteration = 0;

        // exploration policy
        TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)_qLearning.ExplorationPolicy;
        EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

        // loop
        while ((!_needToStop) && (_currentIteration < learningIterations))
        {
            // set exploration rate for this iteration
            explorationPolicy.Epsilon = explorationRate - ((double)_currentIteration / learningIterations) * explorationRate;
            // set learning rate for this iteration
            _qLearning.LearningRate = learningRate - ((double)_currentIteration / learningIterations) * learningRate;
            // clear tabu list
            tabuPolicy.ResetTabuList();

            // reset agent's coordinates to the starting position
            _agentCurrX = _agentStartX;
            _agentCurrY = _agentStartY;

            // steps performed by agent to get to the goal
            int steps = 0;

            while ((!_needToStop) && ((_agentCurrX != _agentStopX) || (_agentCurrY != _agentStopY)))
            {
                steps++;
                // get agent's current state
                int currentState = GetStateNumber(_agentCurrX, _agentCurrY);
                // get the action for this state
                int action = _qLearning.GetAction(currentState);
                // update agent's current position and get his reward
                double reward = UpdateAgentPosition(action);
                // get agent's next state
                int nextState = GetStateNumber(_agentCurrX, _agentCurrY);
                // do learning of the agent - update his Q-function
                _qLearning.UpdateState(currentState, action, reward, nextState);

                // set tabu action
                tabuPolicy.SetTabuAction((action + 2) % 4, 1);
            }

            _currentIteration++;
            Debug.Log(string.Format("{0} steps needed for iteration {1}.", steps, _currentIteration));
        }

        _enableControls = true;
        Debug.Log("QLearning training finished. Try to execute the solution.");
    }
Пример #3
0
        // Sarsa thread
        private void SarsaThread()
        {
            int iteration = 0;
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)sarsa.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            while ((!needToStop) && (iteration < learningIterations))
            {
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                sarsa.LearningRate        = learningRate - ((double)iteration / learningIterations) * learningRate;
                tabuPolicy.ResetTabuList();

                var    agentCurrentX  = agentStartX;
                var    agentCurrentY  = agentStartY;
                int    steps          = 1;
                int    previousState  = GetStateNumber(agentCurrentX, agentCurrentY);
                int    previousAction = sarsa.GetAction(previousState);
                double reward         = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, previousAction);

                while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                {
                    steps++;

                    tabuPolicy.SetTabuAction((previousAction + 2) % 4, 1);
                    int nextState  = GetStateNumber(agentCurrentX, agentCurrentY);
                    int nextAction = sarsa.GetAction(nextState);
                    sarsa.UpdateState(previousState, previousAction, reward, nextState, nextAction);
                    reward         = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, nextAction);
                    previousState  = nextState;
                    previousAction = nextAction;
                }

                if (!needToStop)
                {
                    sarsa.UpdateState(previousState, previousAction, reward);
                }

                System.Diagnostics.Debug.WriteLine(steps);

                iteration++;

                SetText(iterationBox, iteration.ToString());
            }

            // enable settings controls
            EnableControls(true);
        }
Пример #4
0
        // Q-Learning thread
        private void QLearningThread()
        {
            int iteration = 0;
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)qLearning.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            while ((!needToStop) && (iteration < learningIterations))
            {
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                qLearning.LearningRate    = learningRate - ((double)iteration / learningIterations) * learningRate;
                tabuPolicy.ResetTabuList();

                var agentCurrentX = agentStartX;
                var agentCurrentY = agentStartY;

                int steps = 0;
                while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                {
                    steps++;
                    int    currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                    int    action       = qLearning.GetAction(currentState);
                    double reward       = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);
                    int    nextState    = GetStateNumber(agentCurrentX, agentCurrentY);

                    // do learning of the agent - update his Q-function, set Tabu action
                    qLearning.UpdateState(currentState, action, reward, nextState);
                    tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                }

                System.Diagnostics.Debug.WriteLine(steps);
                iteration++;

                SetText(iterationBox, iteration.ToString());
            }

            EnableControls(true);
        }
Пример #5
0
        private static void Main(string[] args)
        {
            /**Region for setting up SARSA function (and possibly parameters)**/

            #region SARSA Setup

            //Set up SARSA object
            var explorationPolicy = new EpsilonGreedyExploration(ExplorationRate);
            var numberOfStates    = 15 * 15 * 15 * 15;
            var numberOfActions   = Enum.GetValues(typeof(Type)).Length;
            var sarsa             = new SARSA(numberOfStates, numberOfActions, explorationPolicy);

            //Prepare the state mapping
            Func <Pokémon, Pokémon, long> getState = (pokémon1, pokémon2) =>
            {
                var moveTypes = pokémon1.Moves.Select(t => t.AttackType).Distinct().ToList();

                return
                    (15 * 15 * 15 * (long)pokémon1.Types[0] +
                     15 * 15 * (long)(pokémon1.Types.Count > 1 ? pokémon1.Types[1] : pokémon1.Types[0]) +
                     15 * (long)pokémon2.Types[0] +
                     1 * (long)(pokémon2.Types.Count > 1 ? pokémon2.Types[1] : pokémon2.Types[0]));
            };

            #endregion SARSA Setup

            using (var sw = new StreamWriter("PineappleExpress.txt"))
            {
                sw.Write("");
            }

            /**Region for setting up the battle itself**/

            #region Battle Execution

            //For the specified number of battles, perform battles and update the policy
            for (var battleNumber = 0; battleNumber < NumberOfBattles; battleNumber++)
            {
                // set exploration rate for this iteration
                explorationPolicy.ExplorationRate =
                    ExplorationRate - (double)battleNumber / NumberOfBattles * ExplorationRate;

                // set learning rate for this iteration
                sarsa.LearningRate = LearningRate - (double)battleNumber / NumberOfBattles * LearningRate;

                //Prepare the Pokémon
                Pokémon pokemon1 = RentalPokémon.RentalPorygon;  //A pre-made Porygon
                Pokémon pokemon2 = RentalPokémon.RentalVenusaur; //A pre-made opponent

                long previousState  = -1;
                var  previousAction = -1;
                long currentState   = -1;
                var  nextAction     = -1;

                var reward    = 0.0;
                var firstTurn = true;

                double percentFinished = 0;

                //Battle loop
                while (!(pokemon1.IsFainted || pokemon2.IsFainted))
                {
                    //Shift states
                    currentState = getState(pokemon1, pokemon2);
                    var validTypes = pokemon1.Moves.Select(m => (int)m.AttackType).Distinct().ToList();
                    nextAction = sarsa.GetAction(currentState, validTypes);

                    //update SARSA
                    if (!firstTurn)
                    {
                        sarsa.UpdateState(previousState, previousAction, reward, currentState, nextAction);
                    }
                    else
                    {
                        firstTurn = false;
                    }

                    //Determine who moves first
                    var firstMover = pokemon1.Stats[Stat.Speed] > pokemon2.Stats[Stat.Speed] ? pokemon1 : pokemon2;

                    //Perform actions
                    if (pokemon1 == firstMover)
                    {
                        reward = pokemon1.UseMoveOfType((Type)nextAction, pokemon2);
                        Console.WriteLine("{0} (Pokémon 1) used a move of type {1}", pokemon1.Species.Name,
                                          Enum.GetName(typeof(Type), (Type)nextAction));
                        Console.WriteLine("Did {0} damage. {1} (Pokémon 2) now has {2} health remaining)",
                                          reward, pokemon2.Species.Name, pokemon2.RemainingHealth);
                        Console.WriteLine(((Type)nextAction).MultiplierOn(pokemon2.Types.ToArray()));
                        if (!pokemon2.IsFainted)
                        {
                            pokemon2.Use(new Random().Next(4), pokemon1);
                        }
                        else
                        {
                            reward += 20;
                        }
                    }
                    else
                    {
                        pokemon2.Use(new Random().Next(4), pokemon1);

                        //Console.WriteLine("{0} (Pokémon 2) used {1}", pokemon2.Species.Name, pokemon2.Moves[0].Name);
                        //Console.WriteLine("Did {0} damage. {1} (Pokémon 1) now has {2} health remaining)",
                        //    reward, pokemon1.Species.Name, pokemon1.RemainingHealth);

                        if (!pokemon1.IsFainted)
                        {
                            reward = pokemon1.UseMoveOfType((Type)nextAction, pokemon2);
                            Console.WriteLine("{0} (Pokémon 1) used a move of type {1}", pokemon1.Species.Name,
                                              Enum.GetName(typeof(Type), (Type)nextAction));
                            Console.WriteLine("Did {0} damage. {1} (Pokémon 2) now has {2} health remaining)",
                                              reward, pokemon2.Species.Name, pokemon2.RemainingHealth);
                            Console.WriteLine(((Type)nextAction).MultiplierOn(pokemon2.Types.ToArray()));
                        }
                    }

                    previousState   = currentState;
                    previousAction  = nextAction;
                    percentFinished = ((double)pokemon2.Stats[Stat.HP] - pokemon2.RemainingHealth) /
                                      pokemon2.Stats[Stat.HP];
                    Console.WriteLine($"{reward}");
                }

                sarsa.UpdateState(previousState, previousAction, reward, currentState, nextAction);

                if (pokemon1.IsFainted)
                {
                    Console.WriteLine("{0} (Pokémon 1) Fainted", pokemon1.Species.Name);
                }
                else
                {
                    Console.WriteLine("{0} (Pokémon 2) Fainted", pokemon2.Species.Name);
                }

                //Print score for graphing
                using (var sw = new StreamWriter($"PineappleExpress({ExplorationRate}_{LearningRate}).txt", true))
                {
                    sw.WriteLine("{0}, {1}", battleNumber, percentFinished);
                }
            }

            #endregion Battle Execution
        }
Пример #6
0
 /// <summary>
 /// Main constructor, currently only calls AForge QLearning class constructor
 /// </summary>
 /// <param name="states">number of states</param>
 /// <param name="actions">number of actions</param>
 /// <param name="explorationRate">exploration rate in epsilon greedy exploration</param>
 public Q(int states, int actions, EpsilonGreedyExploration exploration)
 {
     qLearning = new QLearning(states, actions, exploration, false);
 }
Пример #7
0
        // Show solution thread
        private void ShowSolutionThread()
        {
            // set exploration rate to 0, so agent uses only what he learnt
            TabuSearchExploration    tabuPolicy       = null;
            EpsilonGreedyExploration exploratioPolicy = null;

            if (qLearning != null)
            {
                tabuPolicy = (TabuSearchExploration)qLearning.ExplorationPolicy;
            }
            else if (sarsa != null)
            {
                tabuPolicy = (TabuSearchExploration)sarsa.ExplorationPolicy;
            }
            else
            {
                throw new Exception();
            }

            exploratioPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            exploratioPolicy.Epsilon = 0;
            tabuPolicy.ResetTabuList();

            // curent coordinates of the agent
            int agentCurrentX = agentStartX, agentCurrentY = agentStartY;

            // pripate the map to display
            Array.Copy(map, mapToDisplay, mapWidth * mapHeight);
            mapToDisplay[agentStartY, agentStartX] = 2;
            mapToDisplay[agentStopY, agentStopX]   = 3;

            while (!needToStop)
            {
                // dispay the map
                cellWorld.Map = mapToDisplay;
                // sleep for a while
                Thread.Sleep(200);

                // check if we have reached the end point
                if ((agentCurrentX == agentStopX) && (agentCurrentY == agentStopY))
                {
                    // restore the map
                    mapToDisplay[agentStartY, agentStartX] = 2;
                    mapToDisplay[agentStopY, agentStopX]   = 3;

                    agentCurrentX = agentStartX;
                    agentCurrentY = agentStartY;

                    cellWorld.Map = mapToDisplay;
                    Thread.Sleep(200);
                }

                // remove agent from current position
                mapToDisplay[agentCurrentY, agentCurrentX] = 0;

                // get agent's current state
                int currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                // get the action for this state
                int action = (qLearning != null) ? qLearning.GetAction(currentState) : sarsa.GetAction(currentState);
                // update agent's current position and get his reward
                double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);

                // put agent to the new position
                mapToDisplay[agentCurrentY, agentCurrentX] = 2;
            }

            // enable settings controls
            EnableControls(true);
        }
Пример #8
0
        public void learn_test()
        {
            #region doc_main
            // Fix the random number generator
            Accord.Math.Random.Generator.Seed = 0;

            // In this example, we will be using the QLearning algorithm
            // to make a robot learn how to navigate a map. The map is
            // shown below, where a 1 denotes a wall and 0 denotes areas
            // where the robot can navigate:
            //
            int[,] map =
            {
                { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
                { 1, 1, 0, 0, 0, 0, 0, 0, 1 },
                { 1, 1, 0, 0, 0, 1, 1, 0, 1 },
                { 1, 0, 0, 1, 0, 0, 0, 0, 1 },
                { 1, 0, 0, 1, 1, 1, 1, 0, 1 },
                { 1, 0, 0, 1, 1, 0, 0, 0, 1 },
                { 1, 1, 0, 1, 0, 0, 0, 0, 1 },
                { 1, 1, 0, 1, 0, 1, 1, 0, 1 },
                { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
            };

            // Now, we define the initial and target points from which the
            // robot will be spawn and where it should go, respectively:
            int agentStartX = 1;
            int agentStartY = 4;

            int agentStopX = 7;
            int agentStopY = 4;

            // The robot is able to sense the environment though 8 sensors
            // that capture whether the robot is near a wall or not. Based
            // on the robot's current location, the sensors will return an
            // integer number representing which sensors have detected walls

            Func <int, int, int> getState = (int x, int y) =>
            {
                int c1 = (map[y - 1, x - 1] != 0) ? 1 : 0;
                int c2 = (map[y - 1, x + 0] != 0) ? 1 : 0;
                int c3 = (map[y - 1, x + 1] != 0) ? 1 : 0;
                int c4 = (map[y + 0, x + 1] != 0) ? 1 : 0;
                int c5 = (map[y + 1, x + 1] != 0) ? 1 : 0;
                int c6 = (map[y + 1, x + 0] != 0) ? 1 : 0;
                int c7 = (map[y + 1, x - 1] != 0) ? 1 : 0;
                int c8 = (map[y + 0, x - 1] != 0) ? 1 : 0;

                return(c1 | (c2 << 1) | (c3 << 2) | (c4 << 3) | (c5 << 4) | (c6 << 5) | (c7 << 6) | (c8 << 7));
            };

            // The actions are the possible directions the robot can go:
            //
            //   - case 0: go to north (up)
            //   - case 1: go to east (right)
            //   - case 2: go to south (down)
            //   - case 3: go to west (left)
            //

            int    learningIterations = 1000;
            double explorationRate    = 0.5;
            double learningRate       = 0.5;

            double moveReward = 0;
            double wallReward = -1;
            double goalReward = 1;

            // The function below specifies how the robot should perform an action given its
            // current position and an action number. This will cause the robot to update its
            // current X and Y locations given the direction (above) it was instructed to go:
            Func <int, int, int, Tuple <double, int, int> > doAction = (int currentX, int currentY, int action) =>
            {
                // default reward is equal to moving reward
                double reward = moveReward;

                // moving direction
                int dx = 0, dy = 0;

                switch (action)
                {
                case 0:             // go to north (up)
                    dy = -1;
                    break;

                case 1:             // go to east (right)
                    dx = 1;
                    break;

                case 2:             // go to south (down)
                    dy = 1;
                    break;

                case 3:             // go to west (left)
                    dx = -1;
                    break;
                }

                int newX = currentX + dx;
                int newY = currentY + dy;

                // check new agent's coordinates
                if ((map[newY, newX] != 0) || (newX < 0) || (newX >= map.Columns()) || (newY < 0) || (newY >= map.Rows()))
                {
                    // we found a wall or got outside of the world
                    reward = wallReward;
                }
                else
                {
                    currentX = newX;
                    currentY = newY;

                    // check if we found the goal
                    if ((currentX == agentStopX) && (currentY == agentStopY))
                    {
                        reward = goalReward;
                    }
                }

                return(Tuple.Create(reward, currentX, currentY));
            };


            // After defining all those functions, we create a new Sarsa algorithm:
            var explorationPolicy = new EpsilonGreedyExploration(explorationRate);
            var tabuPolicy        = new TabuSearchExploration(4, explorationPolicy);
            var qLearning         = new QLearning(256, 4, tabuPolicy);

            // curent coordinates of the agent
            int agentCurrentX = -1;
            int agentCurrentY = -1;

            bool needToStop = false;
            int  iteration  = 0;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                // set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;

                // set learning rate for this iteration
                qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;

                // clear tabu list
                tabuPolicy.ResetTabuList();

                // reset agent's coordinates to the starting position
                agentCurrentX = agentStartX;
                agentCurrentY = agentStartY;

                // previous state and action
                int previousState  = getState(agentCurrentX, agentCurrentY);
                int previousAction = qLearning.GetAction(previousState);

                // update agent's current position and get his reward
                var    r      = doAction(agentCurrentX, agentCurrentY, previousAction);
                double reward = r.Item1;
                agentCurrentX = r.Item2;
                agentCurrentY = r.Item3;

                // loop
                while ((!needToStop) && (iteration < learningIterations))
                {
                    // set exploration rate for this iteration
                    explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                    // set learning rate for this iteration
                    qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                    // clear tabu list
                    tabuPolicy.ResetTabuList();

                    // reset agent's coordinates to the starting position
                    agentCurrentX = agentStartX;
                    agentCurrentY = agentStartY;

                    // steps performed by agent to get to the goal
                    int steps = 0;

                    while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                    {
                        steps++;
                        // get agent's current state
                        int currentState = getState(agentCurrentX, agentCurrentY);

                        // get the action for this state
                        int action = qLearning.GetAction(currentState);

                        // update agent's current position and get his reward
                        r             = doAction(agentCurrentX, agentCurrentY, action);
                        reward        = r.Item1;
                        agentCurrentX = r.Item2;
                        agentCurrentY = r.Item3;

                        // get agent's next state
                        int nextState = getState(agentCurrentX, agentCurrentY);

                        // do learning of the agent - update his Q-function
                        qLearning.UpdateState(currentState, action, reward, nextState);

                        // set tabu action
                        tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                    }

                    System.Diagnostics.Debug.WriteLine(steps);

                    iteration++;
                }
            }

            // The end position for the robot will be (7, 4):
            int finalPosX = agentCurrentX; // 7
            int finalPosY = agentCurrentY; // 4;
            #endregion

            Assert.AreEqual(7, finalPosX);
            Assert.AreEqual(4, finalPosY);
        }
Пример #9
0
        private void button13_Click(object sender, EventArgs e)
        {
            vissim = new VissimConnection();
            OpenFileDialog openFileDialog1 = new OpenFileDialog();

            if (openFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
            {
                COM.LoadVissimNetwork(vissim.GetVissimInstance(), openFileDialog1.FileName);
            }
            sim = new Simulation(vissim);
            sim.SetSimulationResolution(1);


            BLL.Neural.SOM som             = new BLL.Neural.SOM(25, 5, 5, 0.5, 1, 18, 100);
            OpenFileDialog openFileDialog2 = new OpenFileDialog();

            if (openFileDialog2.ShowDialog() == System.Windows.Forms.DialogResult.OK)
            {
                double[][] importedData = BLL.HelpMethods.LoadTrainingSet(openFileDialog2.FileName, ',');
                if (importedData != null)
                {
                    som.TrainSOM(importedData, 1000);
                }
            }
            EpsilonGreedyExploration greedyExploration = new EpsilonGreedyExploration(1);
            double beta = 0;

            BLL.Neural.Q q = new BLL.Neural.Q(25, 7, greedyExploration);
            q.qLearning.DiscountFactor = 0.1;

            BLL.Thesis.Actions actions = new BLL.Thesis.Actions(25, 22);

            double            simulationPeriod = COM.getSimulationPeriod(sim.currentSimulation);
            ISignalController SignalController = vissim.GetVissimInstance().Net.SignalControllers.get_ItemByKey(3);
            StreamWriter      writer           = new StreamWriter("Results1000.csv");

            for (int j = 0; j < 1000; j++)
            {
                sim.RunContinuos(299);
                for (int i = 599; i <= simulationPeriod - 1; i = i + 300)
                {
                    double[] qLenMax = sim.queueCounterResultsMax;
                    double[] qLenAvg = sim.queueCounterResultsAvg;
                    double[] qLenAll = new double[18];

                    Array.Copy(qLenAvg, qLenAll, qLenAvg.Length);
                    Array.Copy(qLenMax, 0, qLenAll, qLenAvg.Length, qLenMax.Length);


                    double delayAvgBefore = vissim.GetVissimInstance().Net.VehicleNetworkPerformanceMeasurement.get_AttValue("DelayAvg(Current, Current, All)");

                    int state1 = som.GetWinningNeuronNumber(qLenAll);

                    int action = q.GetAction(state1);

                    actions.PerformAction(state1, action, SignalController);

                    sim.RunContinuos(i);

                    qLenMax = sim.queueCounterResultsMax;
                    qLenAvg = sim.queueCounterResultsAvg;
                    Array.Copy(qLenAvg, qLenAll, qLenAvg.Length);
                    Array.Copy(qLenMax, 0, qLenAll, qLenAvg.Length, qLenMax.Length);

                    int state2 = som.GetWinningNeuronNumber(qLenAll);

                    double delayAvgAfter = vissim.GetVissimInstance().Net.VehicleNetworkPerformanceMeasurement.get_AttValue("DelayAvg(Current, Current, All)");

                    double reward = BLL.Neural.Q.CalculateReward(qLenAvg, delayAvgBefore, delayAvgAfter, beta);

                    q.UpdateQTable(state1, action, reward, state2);
                }

                double resultTTS = vissim.GetVissimInstance().Net.VehicleNetworkPerformanceMeasurement.get_AttValue("TravTmTot(Current, Total, All)");
                //  int seed = sim.currentSimulation.get_AttValue("RandSeed");
                writer.WriteLine(j + "," + resultTTS);
                q.qLearning.ExplorationPolicy = new EpsilonGreedyExploration((Math.Pow(0.995, (j + 1)) * 0.99) + 0.01);
                // beta = (j + 1) * 0.001;
                writer.Flush();
                sim.currentSimulation.RunSingleStep();
            }

            writer.Close();
        }
Пример #10
0
    // Sarsa thread
    private void SarsaThread()
    {
        int iteration = 0;

        // exploration policy
        TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)_sarsa.ExplorationPolicy;
        EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

        // loop
        while ((!_needToStop) && (iteration < learningIterations))
        {
            // set exploration rate for this iteration
            explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
            // set learning rate for this iteration
            _sarsa.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
            // clear tabu list
            tabuPolicy.ResetTabuList();

            // reset agent's coordinates to the starting position
            _agentCurrX = _agentStartX;
            _agentCurrY = _agentStartY;

            // steps performed by agent to get to the goal
            int steps = 1;
            // previous state and action
            int previousState  = GetStateNumber(_agentCurrX, _agentCurrY);
            int previousAction = _sarsa.GetAction(previousState);
            // update agent's current position and get his reward
            double reward = UpdateAgentPosition(previousAction);

            while ((!_needToStop) && ((_agentCurrX != _agentStopX) || (_agentCurrY != _agentStopY)))
            {
                steps++;

                // set tabu action
                tabuPolicy.SetTabuAction((previousAction + 2) % 4, 1);

                // get agent's next state
                int nextState = GetStateNumber(_agentCurrX, _agentCurrY);
                // get agent's next action
                int nextAction = _sarsa.GetAction(nextState);
                // do learning of the agent - update his Q-function
                _sarsa.UpdateState(previousState, previousAction, reward, nextState, nextAction);

                // update agent's new position and get his reward
                reward = UpdateAgentPosition(nextAction);

                previousState  = nextState;
                previousAction = nextAction;
            }

            if (!_needToStop)
            {
                // update Q-function if terminal state was reached
                _sarsa.UpdateState(previousState, previousAction, reward);
            }

            iteration++;
            Debug.Log(string.Format("{0} steps needed for iteration {1}.", steps, iteration));
        }

        _enableControls = true;
        Debug.Log("SARSA training finished. Try to execute the solution.");
    }
Пример #11
0
    void FixedUpdate()
    {
        if (_showSolution)
        {
            // move every 0.5 seconds
            if ((_timeStep + 0.25f) < Time.time)
            {
                _timeStep = Time.time;

                if ((_agentCurrX == _agentStopX) && (_agentCurrY == _agentStopY))
                {
                    PlayerObject.localPosition = new Vector3(_agentStartX, 0, _agentStartY);

                    _agentCurrX = _agentStartX;
                    _agentCurrY = _agentStartY;
                }
                else
                {
                    if (_initShowSolution)
                    {
                        _initShowSolution = false;

                        // set exploration rate to 0, so agent uses only what he learnt
                        TabuSearchExploration    tabuPolicy       = null;
                        EpsilonGreedyExploration exploratioPolicy = null;

                        if (_qLearning != null)
                        {
                            tabuPolicy = (TabuSearchExploration)_qLearning.ExplorationPolicy;
                        }
                        else if (_sarsa != null)
                        {
                            tabuPolicy = (TabuSearchExploration)_sarsa.ExplorationPolicy;
                        }
                        else
                        {
                            tabuPolicy = (TabuSearchExploration)_qLearning_FDGS.ExplorationPolicy;
                        }

                        exploratioPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

                        exploratioPolicy.Epsilon = 0;
                        tabuPolicy.ResetTabuList();

                        PlayerObject.localPosition = new Vector3(_agentStartX, 0, _agentStartY);

                        // current coordinates of the agent
                        _agentCurrX = (int)PlayerObject.localPosition.x;
                        _agentCurrY = (int)PlayerObject.localPosition.z;
                    }

                    if ((_qLearning != null) || (_sarsa != null))
                    {
                        // get agent's current state
                        int currentState = GetStateNumber(_agentCurrX, _agentCurrY);
                        // get the action for this state
                        int action = (_qLearning != null) ? _qLearning.GetAction(currentState) : _sarsa.GetAction(currentState);
                        // update agent's current position and get his reward
                        UpdateAgentPosition(action);
                    }
                    else
                    {
                        // get agent's current state
                        int currentState = _qLearning_FDGS.GetStateFromCoordinates(_agentCurrX, _agentCurrY);
                        // get the action for this state
                        int action = _qLearning_FDGS.GetLearnedAction(currentState);
                        // update agent's current position
                        UpdateAgentPosition(currentState, action);
                    }

                    // set player object position
                    PlayerObject.localPosition = new Vector3(_agentCurrX, 0, _agentCurrY);
                }
            }
        }
        else
        {
            if (!_needToStop)
            {
                // show current iteration
                References.CurrentIteration.text = _currentIteration.ToString();
            }

            if (_enableControls)
            {
                _enableControls = false;

                // enable settings controls
                References.EnableControls(true);
            }
        }
    }
Пример #12
0
        private void SarsaThread()
        {
            int iteration = 0;
            // 当前坐标的代理 curent coordinates of the agent
            int agentCurrentX, agentCurrentY;
            // 探索策略 exploration policy
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)sarsa.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                // 为这个迭代设置勘探速率 set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                // 为迭代设置学习率 set learning rate for this iteration
                sarsa.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                // 清除tabu列表 clear tabu list
                tabuPolicy.ResetTabuList();

                // 复位代理的坐标到起始位置 reset agent's coordinates to the starting position
                agentCurrentX = _agentStartX;
                agentCurrentY = _agentStartY;

                // 代理执行的步骤以达到目标 steps performed by agent to get to the goal
                int steps = 1;
                // 以前的状态和动作 previous state and action
                int previousState  = GetStateNumber(agentCurrentX, agentCurrentY);
                int previousAction = sarsa.GetAction(previousState);
                // 更新代理的当前位置并得到他的奖励 update agent's current position and get his reward
                double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, previousAction);

                while ((!needToStop) && ((agentCurrentX != _agentStopX) || (agentCurrentY != _agentStopY)))
                {
                    steps++;

                    // 设置禁忌动作 set tabu action
                    tabuPolicy.SetTabuAction((previousAction + 2) % 4, 1);

                    // 获取代理的下一个状态 get agent's next state
                    int nextState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // 获取代理的下一个动作 get agent's next action
                    int nextAction = sarsa.GetAction(nextState);
                    // 做学习代理 - 更新他的Q函数 do learning of the agent - update his Q-function
                    sarsa.UpdateState(previousState, previousAction, reward, nextState, nextAction);

                    // 更新代理的新位置并得到他的奖励 update agent's new position and get his reward
                    reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, nextAction);

                    previousState  = nextState;
                    previousAction = nextAction;
                }

                if (!needToStop)
                {
                    // 如果达到终端状态,则更新Q函数 update Q-function if terminal state was reached
                    sarsa.UpdateState(previousState, previousAction, reward);
                }

                System.Diagnostics.Debug.WriteLine(steps);

                iteration++;

                // show current iteration
                SetText(iterationBox, iteration.ToString());
            }

            // enable settings controls
            EnableControls(true);
        }
Пример #13
0
        private void DoubleQLearningThread()
        {
            miniSteps = int.MaxValue;
            MiniOldAction.Clear();
            int iteration = 0;
            // 当前坐标的代理 curent coordinates of the agent
            int agentCurrentX, agentCurrentY;
            // 探索策略 exploration policy
            TabuSearchExploration    tabuPolicy        = (TabuSearchExploration)doubleQLearning.ExplorationPolicy;
            EpsilonGreedyExploration explorationPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                OldAction.Clear();
                // 为这个迭代设置勘探速率 set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                // 为迭代设置学习率 set learning rate for this iteration
                doubleQLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                // 清除tabu列表 clear tabu list
                tabuPolicy.ResetTabuList();

                //复位代理的坐标到起始位置 reset agent's coordinates to the starting position
                agentCurrentX = _agentStartX;
                agentCurrentY = _agentStartY;

                // 代理执行的步骤以达到目标 steps performed by agent to get to the goal
                int steps = 0;

                while ((!needToStop) && ((agentCurrentX != _agentStopX) || (agentCurrentY != _agentStopY)))
                {
                    steps++;

                    // 获取代理的当前状态 get agent's current state
                    int currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // 获取此状态的操作 get the action for this state
                    int action = doubleQLearning.GetAction(currentState);
                    tabuPolicy.ResetTabuList();

                    // 更新代理的当前位置并得到他的奖励 update agent's current position and get his reward
                    double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);
                    // 获取代理的下一个状态 get agent's next state
                    int nextState = GetStateNumber(agentCurrentX, agentCurrentY);
                    // 做学习代理 - 更新他的Q函数 do learning of the agent - update his Q-function
                    doubleQLearning.UpdateState(currentState, action, reward, nextState);

                    var tup = Tuple.Create(currentState, action, reward, nextState);
                    if (OldAction.Contains(tup) == false)
                    {
                        OldAction.Add(tup);
                    }

                    // 设置tabu动作 set tabu action
                    tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                }
                for (int i = OldAction.Count - 1; i >= 0; i--)
                {
                    var a = OldAction[i];
                    doubleQLearning.UpdateState(a.Item1, a.Item2, a.Item3, a.Item4);
                }
                if (steps < miniSteps)
                {
                    miniSteps = steps;
                    MiniOldAction.Clear();
                    for (int i = 0; i < OldAction.Count; i++)
                    {
                        MiniOldAction.Add(OldAction[i]);
                    }
                }
                else
                {
                    for (int i = MiniOldAction.Count - 1; i >= 0; i--)
                    {
                        var a = MiniOldAction[i];
                        doubleQLearning.UpdateState(a.Item1, a.Item2, a.Item3, a.Item4);
                    }
                }


                System.Diagnostics.Debug.WriteLine(steps);

                iteration++;

                // 显示当前迭代 show current iteration
                SetText(iterationBox, iteration.ToString());
            }

            // 启用设置控件 enable settings controls
            EnableControls(true);
        }
Пример #14
0
        private void ShowSolutionThread()
        {
            // 将探索率设置为0,因此代理仅使用他学到的内容 set exploration rate to 0, so agent uses only what he learnt
            TabuSearchExploration    tabuPolicy       = null;
            EpsilonGreedyExploration exploratioPolicy = null;

            if (qLearning != null)
            {
                tabuPolicy = (TabuSearchExploration)qLearning.ExplorationPolicy;
            }
            else if (sarsa != null)
            {
                tabuPolicy = (TabuSearchExploration)sarsa.ExplorationPolicy;
            }
            else
            {
                tabuPolicy = (TabuSearchExploration)doubleQLearning.ExplorationPolicy;
            }

            exploratioPolicy = (EpsilonGreedyExploration)tabuPolicy.BasePolicy;

            exploratioPolicy.Epsilon = 0;
            tabuPolicy.ResetTabuList();

            // 代理的当前坐标 curent coordinates of the agent
            int agentCurrentX = _agentStartX, agentCurrentY = _agentStartY;

            // pripate地图显示 pripate the map to display
            Array.Copy(map, mapToDisplay, map.GetLength(0) * map.GetLength(1));
            mapToDisplay[_agentStartX, _agentStartY] = 2;
            mapToDisplay[_agentStopX, _agentStopY]   = 3;

            while (!needToStop)
            {
                // 显示地图 dispay the map
                this.cellWorld1.Map = mapToDisplay;
                // sleep for a while
                Thread.Sleep(200);

                // 检查我们是否已经到达终点 check if we have reached the end point
                if ((agentCurrentX == _agentStopX) && (agentCurrentY == _agentStopY))
                {
                    // 恢复地图 restore the map
                    mapToDisplay[_agentStartX, _agentStartY] = 2;
                    mapToDisplay[_agentStopX, _agentStopY]   = 3;

                    agentCurrentX = _agentStartX;
                    agentCurrentY = _agentStartY;

                    this.cellWorld1.Map = mapToDisplay;
                    Thread.Sleep(200);
                }

                // 从当前位置删除代理 remove agent from current position
                mapToDisplay[agentCurrentX, agentCurrentY] = 0;

                // 获取代理的当前状态 get agent's current state
                int currentState = GetStateNumber(agentCurrentX, agentCurrentY);
                // 获取此状态的操作 get the action for this state
                int action = GetAction(currentState);// (qLearning != null) ? qLearning.GetAction(currentState) : sarsa.GetAction(currentState);
                // 更新代理的当前位置并得到他的奖励 update agent's current position and get his reward
                double reward = UpdateAgentPosition(ref agentCurrentX, ref agentCurrentY, action);

                // 把代理放到新的位置 put agent to the new position
                mapToDisplay[agentCurrentX, agentCurrentY] = 2;
            }

            // 启用设置控件 enable settings controls
            EnableControls(true);
        }