예제 #1
0
        private void Init(QAgent agent, QOption option)
        {
            if (Benchmark)
            {
                BenchmarkSave.CurrentTestID = _instance.BenchmarkID;
                BenchmarkSave.Runs          = _instance.BenchmarkRuns;
            }
            else if (Mode == QAIMode.Testing && BenchmarkID != null && !BenchmarkID.Equals(""))
            {
                BenchmarkSave.ModelPath = _instance.BenchmarkID;
            }
            else
            {
                BenchmarkSave.CurrentTestID = agent.AI_ID().ID;
                BenchmarkSave.Runs          = 1;
            }
            Debug.Log("Running " + BenchmarkSave.ModelPath + " in mode " + Mode);

            _stopwatch = Stopwatch.StartNew();
            if (Tester != null)
            {
                Tester.Init();
            }

            DontDestroyOnLoad(gameObject);
            switch (Mode)
            {
            case QAIMode.Imitating: {
                _imitation = new QImitation();
                break;
            }

            default: {
                var qlCNN = new QLearningCNN(PrioritizedSweeping, option);
                _qlearning = qlCNN;
                _qlearning.Reset(agent);

                if (Remake)
                {
                    _qlearning.RemakeModel(agent.GetState());
                }
                else
                {
                    _qlearning.LoadModel();
                }

                if (VisualizeNetwork)
                {
                    _visualizer = _qlearning.CreateVisualizer();
                }

                qlCNN.CNN.ValuesComputed += (data, isTraining) => { if (NetworkValuesUpdated != null)
                                                                    {
                                                                        NetworkValuesUpdated(data, isTraining);
                                                                    }
                };
                break;
            }
            }
        }
예제 #2
0
    // Use this for initialization
    void Start()
    {
        BoardGame.setDebug(debug);
        game       = TicTacToe.Instance;
        crossAgent = new TicTacToeAgent(1);
        notAgent   = new TicTacToeAgent(-1);
        game.init(ForLearning, dim, 0, ComputerSymbol);
        brain = QLearning.Instance;
        brain.init(game, dim, epsilon, alpha, gamma);



        if (!ForLearning)
        {
            InitEventTriggers();

            //player.playTicTacToe(dim, NumEpisodes, epsilon, alpha, gamma);

            SetupTiles();

            game.StateMachine.Enable(BackgroundPlane, theTiles, notAgent, crossAgent, BrainType.Player, You);
            // FirstTime
            pubInstance.NotifyListeners("ResetGame");
        }
    }
예제 #3
0
        // On "Start" learning button click
        private void startLearningButton_Click(object sender, EventArgs e)
        {
            // get settings
            GetSettings();
            ShowSettings();

            iterationBox.Text = string.Empty;

            // destroy algorithms
            qLearning = null;
            sarsa     = null;

            if (algorithmCombo.SelectedIndex == 0)
            {
                // create new QLearning algorithm's instance
                qLearning    = new QLearning(256, 4, new TabuSearchExploration(4, new EpsilonGreedyExploration(explorationRate)));
                workerThread = new Thread(new ThreadStart(QLearningThread));
            }
            else
            {
                // create new Sarsa algorithm's instance
                sarsa        = new Sarsa(256, 4, new TabuSearchExploration(4, new EpsilonGreedyExploration(explorationRate)));
                workerThread = new Thread(new ThreadStart(SarsaThread));
            }

            // disable all settings controls except "Stop" button
            EnableControls(false);

            // run worker thread
            needToStop = false;
            workerThread.Start();
        }
예제 #4
0
    public void Create(
        List <MapTile> mapSearching, List <MapTile> mapDelivering,
        Transform parentSearching, Transform parentDelivering,
        QLearning qLSearching, QLearning qLDelivering)
    {
        this.mapSearching     = mapSearching;
        this.mapDelivering    = mapDelivering;
        this.parentSearching  = parentSearching;
        this.parentDelivering = parentDelivering;
        this.qLSearching      = qLSearching;
        this.qLDelivering     = qLDelivering;

        parentDelivering.gameObject.SetActive(false);

        activeQLearning = qLSearching;
        searching       = true;
        playing         = false;

        this.x = qLSearching.initialState.x;
        this.y = qLSearching.initialState.y;
        UpdatePosition();

        // remove
        useExplorationFactor = true;
        playing = true;
    }
예제 #5
0
    // Use this for initialization
    void Start()
    {
        relativePath = Application.dataPath;
        Application.runInBackground = true;
        Application.targetFrameRate = 60;

        //instantiate prey
        prey = Instantiate(preyprefab) as GameObject;
        Controller preyController = prey.GetComponent <Controller>();

        preyStateArray            = new StateArray();
        preyController.stateArray = preyStateArray;
        preyQlearning             = new QLearning(preyController, preyStateArray, prey.GetComponent <PreyScript>().actionSize);
        preyQlearning.bestaction  = true;
        preyController.qAlgorithm = preyQlearning;
        if (predatorsolo)
        {
            preyController.dummy = true;
        }

        //instantiate predator
        predator = Instantiate(predatorprefab) as GameObject;
        Controller predatorController = predator.GetComponent <Controller>();

        predatorStateArray            = new StateArray();
        predatorController.stateArray = predatorStateArray;
        predatorQlearning             = new QLearning(predatorController, predatorStateArray, predator.GetComponent <PredatorScript>().actionSize);
        predatorQlearning.bestaction  = true;
        predatorController.qAlgorithm = predatorQlearning;
        if (preysolo)
        {
            predatorController.dummy = true;
        }
    } // end of start
예제 #6
0
        /// <summary>
        /// Creates the learner
        /// </summary>
        protected override ILearningAlgorithm <MouseState> CreateLearner()
        {
            double alpha       = 1;
            double gamma       = 0.1;
            int    stopDecayAt = (int)(0.9 * this.Environment.Config.MaxEpisodes);

            double epsilon = 0.4;

            var selectionPolicy = new EGreedy(
                epsilon,
                this.Environment.Config.Random,
                DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0));

            return(QLearning <MouseState> .New(
                       Enum.GetValues(typeof(MouseAction)).Length,
                       selectionPolicy,
                       alpha,
                       gamma,
                       this.Environment.Config.Random));

            //return Sarsa<MouseState>.New(
            //	Enum.GetValues(typeof(MouseAction)).Length,
            //	selectionPolicy,
            //	alpha,
            //	gamma,
            //	this.Environment.Config.Random);
        }
예제 #7
0
    override public void learn(int numEpisodes = 10000)
    {
        int numCrossWins = 0, numNotWins = 0, numDraws = 0;

        BoardGame.Print(" learning ....");
        QLearning brain = QLearning.Instance;

        for (int i = 0; i < numEpisodes; i++)
        {
            episode( );
            if (winner == TicTacToe.CrossVal)
            {
                numCrossWins++;
            }
            else if (winner == TicTacToe.NotVal)
            {
                numNotWins++;
            }

            else
            {
                numDraws++;
            }
        }
        brain.writeTrainingData();
        Debug.Log("Total Episodes " + numEpisodes + " Cum Results: NumCrossWins:" + numCrossWins + " NumNotWins: " + numNotWins + " numDraws: " + numDraws);
    }
예제 #8
0
    public void UpdateTile(QLearning qLearning)
    {
        state = qLearning.statesMap[x, y];

        if (state.IsFinalState())
        {
            displayText.text = "F";
        }
        else if (state.IsBlockState())
        {
            displayText.text = "B";
        }
        else
        {
            displayText.text = state.GreaterQFactor().ToString("0.0");
        }

        if (!state.IsInitialState() &&
            !state.IsFinalState() &&
            !state.IsBlockState())
        {
            float c = state.GreaterQFactor() / QLData.FINAL_REWARD;
            image.color = new Color(1f, 1f, 1 - c, 1f);
        }
    }
예제 #9
0
    private void StartLearning_OnClick()
    {
        // reset learning class values
        _qLearning      = null;
        _sarsa          = null;
        _qLearning_FDGS = null;

        if (References.LearningAlgorithm.value == 0)
        {
            // create new QLearning algorithm's instance
            _qLearning    = new QLearning(256, 4, new TabuSearchExploration(4, new EpsilonGreedyExploration(explorationRate)));
            _workerThread = new Thread(new ThreadStart(QLearningThread));
        }
        else if (References.LearningAlgorithm.value == 1)
        {
            // create new Sarsa algorithm's instance
            _sarsa        = new Sarsa(256, 4, new TabuSearchExploration(4, new EpsilonGreedyExploration(explorationRate)));
            _workerThread = new Thread(new ThreadStart(SarsaThread));
        }
        else
        {
            // init QLearn
            _qLearning_FDGS = new QLearning_FDGS(actions, _agentStopX, _agentStopY, _map, new TabuSearchExploration(actions, new EpsilonGreedyExploration(Convert.ToDouble(explorationRate))));
            _workerThread   = new Thread(new ThreadStart(QLearningThread_FDGS));
        }

        // disable all settings controls except "Stop" button
        References.EnableControls(false);

        // run worker thread
        _needToStop = false;
        _workerThread.Start();

        Debug.Log("Learning started. Please wait until training is finished.");
    }
예제 #10
0
    void Start()
    {
        QLearning robot = new QLearning();

        robot.CreateMap(10, 10, 0, 0, 9, 9);

        robot.Loop(6381, true);
    }
예제 #11
0
 void Instantiate()
 {
     this.Movement = GetComponent <CarMovement>();
     sensors       = GetComponentsInChildren <Sensor>();
     Learning      = new QLearning();
     Points        = new Points();
     this.ShowSensors();
     Init();
 }
        public async Task <MapModel> Post([FromBody] GameRequest gameRequest)
        {
            var        aiSign     = gameRequest.AiSign.Equals("X") ? 'X' : 'O';
            var        playerSign = gameRequest.AiSign.Equals("X") ? 'O' : 'X';
            const char emptySign  = '_';
            Move       bestMove;

            switch (gameRequest.ChosenAlgorithm)
            {
            case "MinMax":
                var ticTacToeMinMax = new TicTacToeMinMax(aiSign, playerSign, emptySign);

                Mapper.FillBoardRequest(gameRequest, board, emptySign);
                bestMove = ticTacToeMinMax.FindBestMove(board);
                Mapper.MapAiMove(bestMove, gameRequest, aiSign);

                return(gameRequest.MapModel);

            case "Q-Learning":
                string response;
                if (Game.BoardIsEmpty(gameRequest, emptySign))
                {
                    response = await QLearning.AIStart();
                }
                else
                {
                    var position = Mapper.FindCurrentMovePosition(gameRequest, board, emptySign);
                    response = await QLearning.Move(position);
                }

                Mapper.PythonResponseToMapModel(response, gameRequest.MapModel);

                if (Game.BoardIsFull(gameRequest, emptySign))
                {
                    await QLearning.Start();
                }

                Mapper.FillBoardRequest(gameRequest, board, emptySign);

                return(gameRequest.MapModel);

            case "Monkey":
                var ticTacToeMonkeyAI = new TicTacToeMonkeyAI(emptySign);

                Mapper.FillBoardRequest(gameRequest, board, emptySign);
                bestMove = ticTacToeMonkeyAI.FindBestMove(board);
                Mapper.MapAiMove(bestMove, gameRequest, aiSign);

                return(gameRequest.MapModel);

            default:
                return(Simulator.Simulate(gameRequest));
            }
        }
예제 #13
0
    private void qLearning(State currentState, Strategie strategie)
    {
        var qValue = strategie.getQValueForStrategie(currentState, qValues);

        ExecuteAction(qValue.getAction());
        if (!lastQValue.getAction().Equals(Assets.Scripts.Enum.Action.Begin))
        {
            qValues = QLearning.updateTable(lastQValue, qValue, qValues, getReward());
        }
        lastState  = currentState;
        lastQValue = qValue;
    }
예제 #14
0
    public void FindPath()
    {
        boardStateGraph = board.GetBoardGraph();
        Debug.Log(Graph.ToString(boardStateGraph));

        QLearning qLearning = new QLearning(epochs, actions, rewards, boardStateGraph, learningRate, discountFactor);

        qLearning.Train();

        solution = qLearning.GetPath();
        StartMove();
    }
예제 #15
0
    // Use this for initialization
    void Start()
    {
        QLearning qlSearch  = new QLearning();
        QLearning qlDeliver = new QLearning();

        qlSearch.CreateMap(10, 10, 0, 0, 9, 9);
        qlDeliver.CreateMap(10, 10, 9, 9, 0, 0);

        qlSearch.Loop(4572, true);
        qlDeliver.Loop(4572, true);

        GetComponent <Map>().CreateMap(qlSearch, qlDeliver);
    }
예제 #16
0
    public void AgentTurn(Rules _rules)
    {
        rules = _rules;

        boardStateGraph = board.GetBoardGraph();
        Debug.Log(Graph.ToString(boardStateGraph));

        rules.GetRewards();
        this.actions = rules.actions;
        this.rewards = rules.rewardTiles;

        QLearning qLearning = new QLearning(epochs, actions, rewards, boardStateGraph, learningRate, discountFactor);
        // qLearning.Train();
    }
예제 #17
0
        /// <summary>
        /// Runs the example
        /// </summary>
        public static void Run()
        {
            var slotMachines = new List <SlotMachine>();

            slotMachines.Add(new SlotMachine(20, 120));
            slotMachines.Add(new SlotMachine(5, 100));
            slotMachines.Add(new SlotMachine(40, 150));
            slotMachines.Add(new SlotMachine(25, 130));
            slotMachines.Add(new SlotMachine(25, 120));
            slotMachines.Add(new SlotMachine(60, 120));

            var    random           = new Random(1337);
            int    trainingEpisodes = 10000;
            double decayRatio       = 0.4;

            var environment = new MultiArmedBanditEnvironment(new Configuration(trainingEpisodes, random), slotMachines);
            var agent       = new StatelessAgent <MultiArmedBanditEnvironment>(env =>
            {
                double alpha    = 0.05;
                double gamma    = 0.1;
                int stopDecayAt = (int)(decayRatio * env.Config.MaxEpisodes);

                double epsilon = 0.1;

                var selectionPolicy = new EGreedy(
                    epsilon,
                    env.Config.Random,
                    DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0));

                return(QLearning <EmptyState> .New(
                           slotMachines.Count,
                           selectionPolicy,
                           alpha,
                           gamma,
                           env.Config.Random));
            });

            environment.AddAgent(agent);
            environment.Initialize();

            for (int episode = 0; episode < environment.Config.MaxEpisodes; episode++)
            {
                environment.Reset(episode);
                environment.Update(episode);
            }

            Console.WriteLine(string.Format("Total reward: {0}", environment.TotalReward));
            Console.ReadLine();
        }
예제 #18
0
 private void OnTriggerEnter(Collider collider)
 {
     if (collider.GetComponent <QLearning>())
     {
         auto          = collider.GetComponent <QLearning>();
         auto.snelheid = 0;
         auto.gebotst  = true;
         auto.Reset();
     }
     if (collider.GetComponent <AutoScript1>())
     {
         auto1           = collider.GetComponent <AutoScript1>();
         auto1.moveSpeed = 0;
         auto1.gebotst   = true;
     }
 }
예제 #19
0
        // On "Stop" button click
        private void stopButton_Click(object sender, EventArgs e)
        {
            if (workerThread != null)
            {
                // stop worker thread
                needToStop = true;
                while (!workerThread.Join(100))
                {
                    Application.DoEvents();
                }
                workerThread = null;
            }

            // reset learning class values
            qLearning = null;
            sarsa     = null;
        }
예제 #20
0
        //En caso de que el script exista lo cargamos
        void load()
        {
            //Si el archivo existe lo cargamos y actualizamos los valores del maximo de iteraciones a 600 y reducimos epsilon
            if (File.Exists(Application.dataPath + "/Scripts/Grupo/Solucion3/QLearningInfo" + seed + ".dat"))
            {
                BinaryFormatter bf   = new BinaryFormatter();
                FileStream      file = File.OpenRead(Application.dataPath + "/Scripts/Grupo/Solucion3/QLearningInfo" + seed + ".dat");

                aprender = (QLearning)bf.Deserialize(file);
                file.Close();
                aprender.setE(0.05f);
                aprender.maxIterations = 600;
                //Si queremos ver los valores de la tabla QTable podemos activar esta opción
                //aprender.printTable();
                Debug.Log("Loaded");
            }
        }
예제 #21
0
    void SwapMap()
    {
        if (searching)
        {
            searching       = false;
            activeQLearning = qLDelivering;

            parentDelivering.gameObject.SetActive(true);
            parentSearching.gameObject.SetActive(false);
        }
        else
        {
            searching       = true;
            activeQLearning = qLSearching;

            parentDelivering.gameObject.SetActive(false);
            parentSearching.gameObject.SetActive(true);
        }
    }
예제 #22
0
    private void OnTriggerEnter(Collider collider)
    {
        if (collider.GetComponent <QLearning>())
        {
            auto = collider.GetComponent <QLearning>();
            auto.aantalRonden += 1;

            if (auto.aantalRonden > qTable.ronden)
            {
                qTable.ronden = auto.aantalRonden;
                qTable.voegToePublic();

                if (auto.aantalRonden == 10)
                {
                    qTable.Reset();
                }
            }
        }
        if (collider.GetComponent <AutoScript1>())
        {
            auto1 = collider.GetComponent <AutoScript1>();
            auto1.aantalRonden += 1;

            if (auto1.aantalRonden > NeuralNetwork.ronden)
            {
                print(NeuralNetwork.frame + ", " + auto1.aantalRonden);
                NeuralNetwork.ronden = auto1.aantalRonden;
                NeuralNetwork.voegToePublic();

                if (auto1.aantalRonden == 10)
                {
                    NeuralNetwork.Resette();
                }
            }

            //if (auto.aantalRonden == 10)
            //{
            //    NeuralNetwork.Reset();
            //}
        }
    }
예제 #23
0
    public void CreateMap(QLearning qLSearching, QLearning qLDelivering)
    {
        List <MapTile> mapSearching = new List <MapTile>();

        for (int x = 0; x < qLSearching.sizeX; x++)
        {
            for (int y = 0; y < qLSearching.sizeY; y++)
            {
                MapTile newMapTile = Instantiate <MapTile>(objectMapTile);

                newMapTile.transform.SetParent(this.parentSearching, false);
                newMapTile.Create(qLSearching.statesMap[x, y]);

                mapSearching.Add(newMapTile);
            }
        }

        List <MapTile> mapDelivering = new List <MapTile>();

        for (int x = 0; x < qLDelivering.sizeX; x++)
        {
            for (int y = 0; y < qLDelivering.sizeY; y++)
            {
                MapTile newMapTile = Instantiate <MapTile>(objectMapTile);

                newMapTile.transform.SetParent(this.parentDelivering, false);
                newMapTile.Create(qLDelivering.statesMap[x, y]);

                mapDelivering.Add(newMapTile);
            }
        }

        Robot robot = Instantiate <Robot>(objectRobot);

        robot.transform.SetParent(this.transform, false);
        robot.Create(
            mapSearching, mapDelivering,
            parentSearching, parentDelivering,
            qLSearching, qLDelivering);
    }
예제 #24
0
        private void StartButton_Click(object sender, RoutedEventArgs e)
        {
            // Make the buttons inactive
            AStarRadioButton.IsEnabled     = false;
            QLearningRadioButton.IsEnabled = false;
            LoadGridButton.IsEnabled       = false;
            StartButton.IsEnabled          = false;

            searchTimer          = new DispatcherTimer();
            searchTimer.Interval = TimeSpan.FromMilliseconds(UPDATE_FREQUENCY);



            if ((bool)AStarRadioButton.IsChecked)
            {
                if (aStarSearch == null)
                {
                    aStarSearch = new AStar(gridWorld, startingCell, rewardCell);
                }

                searchTimer.Tick += new EventHandler(aStarSearch.Run);
                searchTimer.Tick += new EventHandler(UpdateAgentPosition);
            }
            else // Q-Learning is checked
            {
                if (qLearningSearch == null)
                {
                    qLearningSearch = new QLearning(gridWorld, startingCell, rewardCell);
                }

                searchTimer.Tick += new EventHandler(qLearningSearch.Run);
                searchTimer.Tick += new EventHandler(UpdateAgentPosition);
            }


            searchTimer.Start();
        }
        public void QLearningGridPathTest()
        {
            var random     = new Random(1337);
            var gridSize   = 16;
            var grid       = new int[gridSize, gridSize];
            var ql         = new QLearning(gridSize * gridSize, 4, new EpsilonGreedyExplorationPolicy(0.0), 0.3, 0.8);
            var pathLength = 20;
            var maxReward  = 0.0;

            for (var x = 0; x < gridSize; x++)
            {
                for (var y = 0; y < gridSize; y++)
                {
                    grid[x, y] = random.Next(0, 100);
                }
            }

            // train
            for (var i = 0; i < 100; i++)
            {
                var x             = random.Next(0, gridSize);
                var y             = random.Next(0, gridSize);
                var currentReward = 0.0;

                ql.Begin(x + y * gridSize);

                for (var j = 0; j < pathLength; j++)
                {
                    switch ((Action)ql.SelectedAction)
                    {
                    case Action.UP:
                        y++;
                        break;

                    case Action.DOWN:
                        y--;
                        break;

                    case Action.LEFT:
                        x--;
                        break;

                    case Action.RIGHT:
                        x++;
                        break;
                    }

                    var r = 0;
                    if (x < 0 || x >= gridSize || y < 0 || y >= gridSize)
                    {
                        x = x < 0 ? 0 : x >= gridSize ? gridSize - 1 : x;
                        y = y < 0 ? 0 : y >= gridSize ? gridSize - 1 : y;
                    }
                    else
                    {
                        r = grid[x, y];
                    }

                    currentReward += r;

                    var nextState = x + y * gridSize;
                    ql.Step(r, nextState);
                }

                if (currentReward > maxReward)
                {
                    maxReward = currentReward;
                }
            }

            Assert.AreEqual(896, maxReward);
        }
예제 #26
0
 /*
  * given by the runGame
  */
 public void setQLearning(QLearning algorithm) {
     qAlgorithm = algorithm;
 }
예제 #27
0
 /// <summary>
 /// Main constructor, currently only calls AForge QLearning class constructor
 /// </summary>
 /// <param name="states">number of states</param>
 /// <param name="actions">number of actions</param>
 /// <param name="explorationRate">exploration rate in epsilon greedy exploration</param>
 public Q(int states, int actions, EpsilonGreedyExploration exploration)
 {
     qLearning = new QLearning(states, actions, exploration, false);
 }
예제 #28
0
        public void learn_test()
        {
            #region doc_main
            // Fix the random number generator
            Accord.Math.Random.Generator.Seed = 0;

            // In this example, we will be using the QLearning algorithm
            // to make a robot learn how to navigate a map. The map is
            // shown below, where a 1 denotes a wall and 0 denotes areas
            // where the robot can navigate:
            //
            int[,] map =
            {
                { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
                { 1, 1, 0, 0, 0, 0, 0, 0, 1 },
                { 1, 1, 0, 0, 0, 1, 1, 0, 1 },
                { 1, 0, 0, 1, 0, 0, 0, 0, 1 },
                { 1, 0, 0, 1, 1, 1, 1, 0, 1 },
                { 1, 0, 0, 1, 1, 0, 0, 0, 1 },
                { 1, 1, 0, 1, 0, 0, 0, 0, 1 },
                { 1, 1, 0, 1, 0, 1, 1, 0, 1 },
                { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
            };

            // Now, we define the initial and target points from which the
            // robot will be spawn and where it should go, respectively:
            int agentStartX = 1;
            int agentStartY = 4;

            int agentStopX = 7;
            int agentStopY = 4;

            // The robot is able to sense the environment though 8 sensors
            // that capture whether the robot is near a wall or not. Based
            // on the robot's current location, the sensors will return an
            // integer number representing which sensors have detected walls

            Func <int, int, int> getState = (int x, int y) =>
            {
                int c1 = (map[y - 1, x - 1] != 0) ? 1 : 0;
                int c2 = (map[y - 1, x + 0] != 0) ? 1 : 0;
                int c3 = (map[y - 1, x + 1] != 0) ? 1 : 0;
                int c4 = (map[y + 0, x + 1] != 0) ? 1 : 0;
                int c5 = (map[y + 1, x + 1] != 0) ? 1 : 0;
                int c6 = (map[y + 1, x + 0] != 0) ? 1 : 0;
                int c7 = (map[y + 1, x - 1] != 0) ? 1 : 0;
                int c8 = (map[y + 0, x - 1] != 0) ? 1 : 0;

                return(c1 | (c2 << 1) | (c3 << 2) | (c4 << 3) | (c5 << 4) | (c6 << 5) | (c7 << 6) | (c8 << 7));
            };

            // The actions are the possible directions the robot can go:
            //
            //   - case 0: go to north (up)
            //   - case 1: go to east (right)
            //   - case 2: go to south (down)
            //   - case 3: go to west (left)
            //

            int    learningIterations = 1000;
            double explorationRate    = 0.5;
            double learningRate       = 0.5;

            double moveReward = 0;
            double wallReward = -1;
            double goalReward = 1;

            // The function below specifies how the robot should perform an action given its
            // current position and an action number. This will cause the robot to update its
            // current X and Y locations given the direction (above) it was instructed to go:
            Func <int, int, int, Tuple <double, int, int> > doAction = (int currentX, int currentY, int action) =>
            {
                // default reward is equal to moving reward
                double reward = moveReward;

                // moving direction
                int dx = 0, dy = 0;

                switch (action)
                {
                case 0:             // go to north (up)
                    dy = -1;
                    break;

                case 1:             // go to east (right)
                    dx = 1;
                    break;

                case 2:             // go to south (down)
                    dy = 1;
                    break;

                case 3:             // go to west (left)
                    dx = -1;
                    break;
                }

                int newX = currentX + dx;
                int newY = currentY + dy;

                // check new agent's coordinates
                if ((map[newY, newX] != 0) || (newX < 0) || (newX >= map.Columns()) || (newY < 0) || (newY >= map.Rows()))
                {
                    // we found a wall or got outside of the world
                    reward = wallReward;
                }
                else
                {
                    currentX = newX;
                    currentY = newY;

                    // check if we found the goal
                    if ((currentX == agentStopX) && (currentY == agentStopY))
                    {
                        reward = goalReward;
                    }
                }

                return(Tuple.Create(reward, currentX, currentY));
            };


            // After defining all those functions, we create a new Sarsa algorithm:
            var explorationPolicy = new EpsilonGreedyExploration(explorationRate);
            var tabuPolicy        = new TabuSearchExploration(4, explorationPolicy);
            var qLearning         = new QLearning(256, 4, tabuPolicy);

            // curent coordinates of the agent
            int agentCurrentX = -1;
            int agentCurrentY = -1;

            bool needToStop = false;
            int  iteration  = 0;

            // loop
            while ((!needToStop) && (iteration < learningIterations))
            {
                // set exploration rate for this iteration
                explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;

                // set learning rate for this iteration
                qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;

                // clear tabu list
                tabuPolicy.ResetTabuList();

                // reset agent's coordinates to the starting position
                agentCurrentX = agentStartX;
                agentCurrentY = agentStartY;

                // previous state and action
                int previousState  = getState(agentCurrentX, agentCurrentY);
                int previousAction = qLearning.GetAction(previousState);

                // update agent's current position and get his reward
                var    r      = doAction(agentCurrentX, agentCurrentY, previousAction);
                double reward = r.Item1;
                agentCurrentX = r.Item2;
                agentCurrentY = r.Item3;

                // loop
                while ((!needToStop) && (iteration < learningIterations))
                {
                    // set exploration rate for this iteration
                    explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate;
                    // set learning rate for this iteration
                    qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate;
                    // clear tabu list
                    tabuPolicy.ResetTabuList();

                    // reset agent's coordinates to the starting position
                    agentCurrentX = agentStartX;
                    agentCurrentY = agentStartY;

                    // steps performed by agent to get to the goal
                    int steps = 0;

                    while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY)))
                    {
                        steps++;
                        // get agent's current state
                        int currentState = getState(agentCurrentX, agentCurrentY);

                        // get the action for this state
                        int action = qLearning.GetAction(currentState);

                        // update agent's current position and get his reward
                        r             = doAction(agentCurrentX, agentCurrentY, action);
                        reward        = r.Item1;
                        agentCurrentX = r.Item2;
                        agentCurrentY = r.Item3;

                        // get agent's next state
                        int nextState = getState(agentCurrentX, agentCurrentY);

                        // do learning of the agent - update his Q-function
                        qLearning.UpdateState(currentState, action, reward, nextState);

                        // set tabu action
                        tabuPolicy.SetTabuAction((action + 2) % 4, 1);
                    }

                    System.Diagnostics.Debug.WriteLine(steps);

                    iteration++;
                }
            }

            // The end position for the robot will be (7, 4):
            int finalPosX = agentCurrentX; // 7
            int finalPosY = agentCurrentY; // 4;
            #endregion

            Assert.AreEqual(7, finalPosX);
            Assert.AreEqual(4, finalPosY);
        }
예제 #29
0
    private void Update()
    {
        if (!GameManager.instance.playerTurn)
        {
            return;
        }
        timer += Time.deltaTime;
        if (timer > 0.1f)
        {
            int       horizontal   = 0;
            int       vertical     = 0;
            int       oneMove      = 0;
            int       chooseRandom = 0;
            double    reward       = -1;
            QLearning q            = new QLearning();
            //QAction fromTo;
            //QState state;
            string stateName;
            string stateNameNext;
            //string which_action;
            System.Random rnd  = new System.Random();
            double        Qmax = 0;
            double        q0   = 0;
            double        q1   = 0;
            double        q2   = 0;
            double        q3   = 0;

            if (!QValue.ContainsKey(preX + ',' + preY))
            {
                double[] defaults = new double[] { 0.0, 0.0, 0.0, 0.0 };
                QValue.Add(preX + ',' + preY, defaults);
            }


            stateName    = preX + "," + preY;
            chooseRandom = (int)rnd.Next(0, 10);
            if (chooseRandom <= 2)
            {
                oneMove = (int)rnd.Next(0, 4);
                if (oneMove == 0)
                {
                    horizontal = 1;
                    vertical   = 0;
                    cx         = cx + 1;
                    AttemptMove <Wall>(horizontal, vertical);
                }
                else if (oneMove == 1)
                {
                    horizontal = -1;
                    vertical   = 0;
                    cx         = cx - 1;
                    AttemptMove <Wall>(horizontal, vertical);
                }
                else if (oneMove == 2)
                {
                    vertical   = 1;
                    horizontal = 0;
                    cy         = cy + 1;
                    AttemptMove <Wall>(horizontal, vertical);
                }
                else
                {
                    vertical   = -1;
                    horizontal = 0;
                    cy         = cy - 1;
                    AttemptMove <Wall>(horizontal, vertical);
                }
            }

            else
            {
                q0 = QValue[stateName][0];
                q1 = QValue[stateName][1];
                q2 = QValue[stateName][2];
                q3 = QValue[stateName][3];
                if (q0 >= q1 && q0 >= q1 && q0 >= q2 && q0 >= q3)
                {
                    horizontal = 1;
                    vertical   = 0;
                    cx         = cx + 1;
                    AttemptMove <Wall>(horizontal, vertical);
                }
                else if (q1 >= q2 && q1 >= q3)
                {
                    horizontal = -1;
                    vertical   = 0;
                    cx         = cx - 1;
                    AttemptMove <Wall>(horizontal, vertical);
                }
                else if (q2 >= q3)
                {
                    vertical   = 1;
                    horizontal = 0;
                    cy         = cy + 1;
                    AttemptMove <Wall>(horizontal, vertical);
                }
                else
                {
                    vertical   = -1;
                    horizontal = 0;
                    cy         = cy - 1;
                    AttemptMove <Wall>(horizontal, vertical);
                }
            }
            currentX = cx.ToString();
            currentY = cy.ToString();
            if (!QValue.ContainsKey(currentX + ',' + currentY))
            {
                double[] defaults = new double[] { 0.0, 0.0, 0.0, 0.0 };
                QValue.Add(currentX + ',' + currentY, defaults);
            }


            // State Bedroom
            stateName     = preX + "," + preY;
            stateNameNext = currentX + "," + currentY;
            if (stateNameNext == "7,7")
            {
                reward = 100;
            }
            else
            {
                reward = -1;
            }
            Qmax = QValue[stateNameNext].Max();
            QValue[preX + ',' + preY][oneMove] = QValue[currentX + ',' + currentY][oneMove] + alpha * (reward + gamma * Qmax - QValue[currentX + ',' + currentY][oneMove]);



            preX       = currentX;
            preY       = currentY;
            horizontal = 0;
            vertical   = 0;

            timer = 0.0f;
        }
    }
예제 #30
0
    void Update()
    {
        // Detect if a game or algo are change
        if (selectedGame != oldGame || selectedAlgo != oldAlgo || selectedSokobanLevel != oldSokobanLevel)
        {
            try {
                btnComponent.onClick.RemoveListener(game.TaskOnClick);
            } catch {
                Debug.Log("No listener attach to the play button.");
            }

            // update old values
            oldGame         = selectedGame;
            oldAlgo         = selectedAlgo;
            oldSokobanLevel = selectedSokobanLevel;

            // Select the RL algorithm
            Base algoType;
            if (selectedAlgo == Algo.MarkovPolicy)
            {
                algoType = new MarkovPolicy();
            }
            else if (selectedAlgo == Algo.MarkovValue)
            {
                algoType = new MarkovValue();
            }
            else if (selectedAlgo == Algo.MonteCarlo)
            {
                algoType = new MonteCarlo();
            }
            else if (selectedAlgo == Algo.SARSA)
            {
                algoType = new SARSA();
            }
            else
            {
                algoType = new QLearning();
            }

            // Select the game
            Type gameType;
            if (selectedGame == Game.GridWorld)
            {
                gameType = typeof(GridWorld.GridWorld <>);
            }
            else if (selectedGame == Game.TicTacToe)
            {
                gameType = typeof(TicTacToe.TicTacToe <>);
            }
            else
            {
                gameType = typeof(Sokoban.Sokoban <>);
            }

            // Create the game instance for the RL algorithm
            var type = gameType.MakeGenericType(algoType.GetType());
            game = (IGame)Activator.CreateInstance(type);

            // Clear the screen
            GameObject[] gos = GameObject.FindGameObjectsWithTag("Tile");
            foreach (GameObject go in gos)
            {
                var tile = go.GetComponent <SpriteRenderer>();
                tile.color = new Color(0, 0, 0, 0);
            }
            // Hide the player
            goPlayer.transform.position = new Vector3(50, 50, 0);

            // Start the game
            var watch = System.Diagnostics.Stopwatch.StartNew();
            game.Start();
            watch.Stop();
            elapsedMs = watch.ElapsedMilliseconds;

            // Debug to move the game step by step
            btnComponent.onClick.AddListener(game.TaskOnClick);
        }

        game.Update();
    }