コード例 #1
0
ファイル: Player.cs プロジェクト: AlejandroGd/LearningLabrynt
    //Gives you the max Q-Value possible from a determined state (cell).
    private double CalculateEstimate(int nextCell)
    {
        double estimate = 0;

        if (!LabryntRules.IsFinalState(nextCell))
        {
            Movement bestMove = GetBestMovement(nextCell);
            estimate = qMat.GetQValue(nextCell, bestMove);
        }

        return(estimate);
    }
コード例 #2
0
ファイル: Player.cs プロジェクト: AlejandroGd/LearningLabrynt
    // Update is called once per frame
    void Update()
    {
        if (!paused)
        {
            timer += Time.deltaTime;
            float dt = speedSelector.GetInverse();
            if (timer > dt)
            {
                //Calculate how many choices we need according to the speed selector.
                int loops = (int)(timer / dt);
                while (loops > 0)
                {
                    if (restart)
                    {
                        InitialiseAlgorithm();
                        restart    = false;
                        newEpisode = true;
                    }

                    if (newEpisode)
                    {
                        SetValuesForNewEpisode();
                        newEpisode = false;
                    }

                    Movement currentMove;
                    //Choose action (a) based on policy (p)
                    if (ShouldExplore())
                    {
                        currentMove = GetRandomMovement(currentCell);
                    }
                    else
                    {
                        currentMove = GetBestMovement(currentCell);
                    }

                    //Observe reward (r)
                    int    reward    = LabryntRules.GetReward(currentCell, currentMove);
                    double oldQValue = qMat.GetQValue(currentCell, currentMove);

                    //Do an estimation of the best Q-value to obtain from next state.
                    int    nextCell   = LabryntRules.GetLandingCell(currentCell, currentMove);
                    double estimation = CalculateEstimate(nextCell);

                    //Recalculate Q-value for the current cell based on estimation.
                    double updatedQValue = CalculateNewQValue(oldQValue, alpha, gamma, reward, estimation);
                    qMat.SetQValue(currentCell, currentMove, updatedQValue);

                    cumulativeReward += reward;
                    currentCell       = nextCell;

                    UpdatePlayerPositionInDisplay(); //Now we update the display with the new player position.

                    //If the player landed in an final state cell, we need to update epsilon and start a new episode.
                    if (LabryntRules.IsFinalState(currentCell))
                    {
                        //Change epsilon parameter so it shifts from exploration to explotation gradually between episodes.
                        if (epsilon > 0.3)
                        {
                            epsilon *= epsilonDecay1;
                        }
                        if (epsilon < 0.3)
                        {
                            epsilon *= epsilonDecay2;
                        }
                        newEpisode = true;
                    }

                    loops--;
                }

                timer -= dt;
            }
        }
    }