Пример #1
0
    //Check available movements from a cell and returns one of them randomly.
    private Movement GetRandomMovement(int fromCell)
    {
        List <Movement> possibleMoves = LabryntRules.GetPossibleMovements(fromCell);
        int             randomIndex   = Random.Range(0, possibleMoves.Count);

        return(possibleMoves[randomIndex]);
    }
Пример #2
0
    //Gives you the max Q-Value possible from a determined state (cell).
    private double CalculateEstimate(int nextCell)
    {
        double estimate = 0;

        if (!LabryntRules.IsFinalState(nextCell))
        {
            Movement bestMove = GetBestMovement(nextCell);
            estimate = qMat.GetQValue(nextCell, bestMove);
        }

        return(estimate);
    }
Пример #3
0
    //Checks the Q-Matrix and returns the best move from the available ones.
    //Best movement is decided by choosing the one with the highest q-value. If the highest value appears in more
    //than one move, chooses randomly between those moves
    private Movement GetBestMovement(int fromCell)
    {
        List <Movement> bestMovements     = new List <Movement>();
        List <Movement> possibleMovements = LabryntRules.GetPossibleMovements(fromCell);

        //There is at least one move from all cells (terminal states stop the algorithm before getting here)
        //so it is saved to compare with the rest.
        bestMovements.Add(possibleMovements[0]);
        double highestQValue = qMat.GetQValue(fromCell, possibleMovements[0]);

        //Compare to other values, keep a list with the movements with highest q-value.
        for (int x = 1; x < possibleMovements.Count; x++)
        {
            double qValue = qMat.GetQValue(fromCell, possibleMovements[x]);
            if (highestQValue == qValue)
            {
                bestMovements.Add(possibleMovements[x]);
            }
            else if (highestQValue < qValue)
            {
                bestMovements.Clear();
                bestMovements.Add(possibleMovements[x]);
                highestQValue = qValue;
            }
        }

        //If more than one has the highest value, choos randomly between them.
        int index = 0;

        if (bestMovements.Count > 1)
        {
            index = Random.Range(0, bestMovements.Count);
        }

        return(bestMovements[index]);
    }
Пример #4
0
    // Update is called once per frame
    void Update()
    {
        if (!paused)
        {
            timer += Time.deltaTime;
            float dt = speedSelector.GetInverse();
            if (timer > dt)
            {
                //Calculate how many choices we need according to the speed selector.
                int loops = (int)(timer / dt);
                while (loops > 0)
                {
                    if (restart)
                    {
                        InitialiseAlgorithm();
                        restart    = false;
                        newEpisode = true;
                    }

                    if (newEpisode)
                    {
                        SetValuesForNewEpisode();
                        newEpisode = false;
                    }

                    Movement currentMove;
                    //Choose action (a) based on policy (p)
                    if (ShouldExplore())
                    {
                        currentMove = GetRandomMovement(currentCell);
                    }
                    else
                    {
                        currentMove = GetBestMovement(currentCell);
                    }

                    //Observe reward (r)
                    int    reward    = LabryntRules.GetReward(currentCell, currentMove);
                    double oldQValue = qMat.GetQValue(currentCell, currentMove);

                    //Do an estimation of the best Q-value to obtain from next state.
                    int    nextCell   = LabryntRules.GetLandingCell(currentCell, currentMove);
                    double estimation = CalculateEstimate(nextCell);

                    //Recalculate Q-value for the current cell based on estimation.
                    double updatedQValue = CalculateNewQValue(oldQValue, alpha, gamma, reward, estimation);
                    qMat.SetQValue(currentCell, currentMove, updatedQValue);

                    cumulativeReward += reward;
                    currentCell       = nextCell;

                    UpdatePlayerPositionInDisplay(); //Now we update the display with the new player position.

                    //If the player landed in an final state cell, we need to update epsilon and start a new episode.
                    if (LabryntRules.IsFinalState(currentCell))
                    {
                        //Change epsilon parameter so it shifts from exploration to explotation gradually between episodes.
                        if (epsilon > 0.3)
                        {
                            epsilon *= epsilonDecay1;
                        }
                        if (epsilon < 0.3)
                        {
                            epsilon *= epsilonDecay2;
                        }
                        newEpisode = true;
                    }

                    loops--;
                }

                timer -= dt;
            }
        }
    }