Ejemplo n.º 1
0
    //Move the Agent and update the Q-Value for the current state
    public void PerformAction(Vector2 action)
    {
        Vector2 agentPos    = controller.agentPosition;
        int     indexAction = -1;

        indexAction = controller.isSquare ? ActionSquare.GetIndexByVector(action) : ActionHex.GetIndexByVector(action, (int)(agentPos.y) % 2);
        float reward;
        float maxNextValue;

        //Transfrom 2D position (x,y) in 1D index (x+y*n)
        int curretStateIndex = (int)(agentPos.x + agentPos.y * controller.n);

        //Update Agent position (current position + new action)
        agentPos += action;
        int nextStateIndex = (int)(agentPos.x + agentPos.y * controller.n);

        //Update Agent position in the controller
        controller.agentPosition += action;

        int x = (int)agentPos.x;
        int y = (int)agentPos.y;

        if (controller.checkIndexBoundaries(x, y))
        {
            controller.previousPlatform = controller.currentPlatform;
            controller.currentPlatform  = controller.platforms[x, y];

            Vector3 pos = controller.currentPlatform.transform.position + Vector3.back * 0.1f;
            controller.agent.transform.position = pos;

            reward = controller.currentPlatform.reward;
            controller.totalReward += reward;

            //The min reward can be taken only once
            if (controller.currentPlatform.minRewardPoint)
            {
                controller.currentPlatform.DisableMinReward();
            }

            Vector2 any;
            FindMaxValue(nextStateIndex, out any, out maxNextValue);
        }
        else
        {
            //Out of the platform boundaries
            controller.end = true;

            reward       = -10;
            maxNextValue = 0;

            print("Out of Boundary");
        }

        float oldQvalue = qTable[curretStateIndex, indexAction];

        //Bellman Equation
        qTable[curretStateIndex, indexAction] = oldQvalue + controller.learningRate * (reward + controller.discountRate * maxNextValue - oldQvalue);
        controller.end = controller.end ? controller.end : controller.currentPlatform.CheckGameState();
    }
Ejemplo n.º 2
0
    public void FindMaxValue(int index, out Vector2 action, out float maxValue)
    {
        maxValue = -999;
        int indexAction = -1;

        //Future action that maximize the reward
        Vector2 maxValueAction = new Vector2(-1, -1);

        for (int i = 0; i < controller.nActions; i++)
        {
            if (maxValue < qTable[index, i])
            {
                maxValue    = qTable[index, i];
                indexAction = i;
            }
        }

        action = controller.isSquare ? ActionSquare.GetVectorByIndex(indexAction) : ActionHex.GetVectorByIndex(indexAction, (int)(controller.agentPosition.y) % 2);
    }
Ejemplo n.º 3
0
    public Vector2 ChooseAction()
    {
        //Choose a random number n between 0 and 1
        float   v      = Random.Range(0f, 1f);
        Vector2 action = new Vector2(-1, -1);

        if (v < controller.epsilon)
        {
            //If n is less than the epsilon choose a random action
            action             = controller.isSquare ? ActionSquare.ChooseRandomAction() : ActionHex.ChooseRandomAction((int)(controller.agentPosition.y) % 2);
            controller.epsilon = controller.epsilon > controller.minEplison ? controller.epsilon - controller.epsilonDecay : controller.minEplison;
            print("Random Action Chosen: " + action);
        }
        else
        {
            //Otherwise choose an action based on the max value in the QTable of the current state(platform position)
            float maxValue = -999;
            FindMaxValue((int)(controller.agentPosition.x + controller.agentPosition.y * n), out action, out maxValue);
            print("Action Chosen: " + action);
        }

        controller.steps++;
        if (controller.steps > controller.maxSteps)
        {
            controller.end = true;
        }
        return(action);
    }