Exemple #1
0
    private float findVopt(RLGameState rlGameState)
    {
        //Get Vopt For new State
        float Vopt = -Mathf.Infinity;

        foreach (Direction dir in System.Enum.GetValues(typeof(Direction)))
        {
            var key = stateActionToString(rlGameState.GetCurrentState(), dir);
            if (!qvalues.ContainsKey(key))
            {
                continue;
//				qvalues [key] = 0;
            }
            if (Vopt < qvalues[key])
            {
                Vopt = qvalues[key];
            }
        }

        if (Vopt == -Mathf.Infinity)
        {
            Vopt = 0;
        }
        return(Vopt);
    }
    public void MakeMove()
    {
        // Get all the current game states
        RLGameState rlGameState = new RLGameState();
        List <int>  currstate   = rlGameState.GetCurrentState();

        // MakeChoice.
        Direction ourChoice = makeDeterministicChoice(currstate);
        float     randFloat = Random.Range(0.0f, 1.0f);

        if (randFloat < epsilon)
        {
            List <Direction> epsilonList = new List <Direction>();
            if (currstate[1] == 0)
            {
                epsilonList.Add(Direction.FRONT);
            }
            if (currstate[3] == 0)
            {
                epsilonList.Add(Direction.LEFT);
            }
            if (currstate[4] == 0)
            {
                epsilonList.Add(Direction.RIGHT);
            }
            if (currstate[6] == 0)
            {
                epsilonList.Add(Direction.BACK);
            }

            epsilonList.Add(Direction.STAY);
            int rand = Random.Range(0, epsilonList.Count);
            ourChoice = epsilonList[rand];
        }
        bool successfullymoved = successfullyMovedPos(ourChoice);

        manualMoveAllObjects();

        //Get Vopt For new State
        float Vopt = findVopt(rlGameState);

        // if Dead -> Save the q values to a text file. (and later reload it.)
        // Get Reward
        float r = 0;

        if (ourChoice == Direction.FRONT)
        {
            countFront++;
            r += 7;
        }
        else if (ourChoice == Direction.BACK)
        {
            countFront--;
            r -= 9;
        }
        else if (ourChoice == Direction.STAY)
        {
            r -= 8;
        }

        // discourage dying by penalizing a lot of points
        if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead())
        {
            r -= 300;
        }

        float eta = 0.01f;

        var key = stateActionToString(currstate, ourChoice);

        if (!qvalues.ContainsKey(key))
        {
            qvalues[key] = 0;
        }

        // Q learning Function
        qvalues[key] = (1 - eta) * qvalues[key] + eta * (r + discountFactor * Vopt);

        if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead())
        {
            saveDictionary();
            saveIteration();
            saveHighestScore();
            saveScore();

            GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().RestartGame();
        }

        Moved = true;
    }
Exemple #3
0
    public void MakeMove()
    {
        //		foreach (var i in weight) {
        //			Debug.Log (i);
        //		}
        //CurrentState <Alywn's function>
        RLGameState rlGameState = new RLGameState();
        List <int>  currstate   = rlGameState.GetCurrentFAState();
        //		foreach (var i in currstate) {
        //			Debug.Log (i);
        //		}
        //First Change: MakeChoice.
        Qvalue_Direction_Pair qvalueDirectionPair = FindVOpt(currstate, weight);
        Direction             ourChoice           = qvalueDirectionPair.dir;



        float randFloat = Random.Range(0.0f, 1.0f);

        if (randFloat < epsilon)
        {
            List <Direction> epsilonList = new List <Direction>();
            if (currstate[1] == 0)
            {
                epsilonList.Add(Direction.FRONT);
            }
            if (currstate[3] == 0)
            {
                epsilonList.Add(Direction.LEFT);
            }
            if (currstate[4] == 0)
            {
                epsilonList.Add(Direction.RIGHT);
            }
            if (currstate[6] == 0)
            {
                epsilonList.Add(Direction.BACK);
            }

            epsilonList.Add(Direction.STAY);
            int rand = Random.Range(0, epsilonList.Count);
            ourChoice = epsilonList[rand];
        }

        bool successfullymoved = successfullyMovedPos(ourChoice);

        manualMoveAllObjects();
        //Get Vopt For new State
        Qvalue_Direction_Pair Vopt = FindVOpt(rlGameState.GetCurrentFAState(), weight);
        float vopt = Vopt.qvalue;

        float r = 0;

        //+8 for forward,
        if (ourChoice == Direction.FRONT /* && successfullymoved*/)
        {
            r += 7;
        }
        else if (ourChoice == Direction.BACK /* && successfullymoved*/)
        {
            r -= 9;
        }
        else if (ourChoice == Direction.STAY)
        {
            r -= 8;
        }

        if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead())
        {
            r -= 300;
        }

        float eta = 0.01f;

        float constant = eta * (qvalueDirectionPair.qvalue - (r + vopt));

        weight = updateWeight(weight, addActionToCurrState(currstate, ourChoice), constant);
        //		var weightstring = "";
        //		foreach (var dickhead in weight) {
        //			weightstring += dickhead;
        //		}
        //		Debug.Log ("WeightSTRING: " + weightstring);

        if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead())
        {
            //Save Weight Vector at death
            saveWeight();
            saveScore();
            GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().RestartGame();
        }

        Moved = true;
    }
Exemple #4
0
    public void MakeMove()
    {
        //CurrentState <Alywn's function>
        RLGameState rlGameState = new RLGameState();
        List <int>  currstate   = rlGameState.GetCurrentState();

        //First Change: MakeChoice.
        Direction ourChoice = makeDeterministicChoice(currstate);
        float     randFloat = Random.Range(0.0f, 1.0f);

        if (randFloat < epsilon)
        {
            List <Direction> epsilonList = new  List <Direction>();
            if (currstate[1] == 0)
            {
                epsilonList.Add(Direction.FRONT);
            }
            if (currstate[3] == 0)
            {
                epsilonList.Add(Direction.LEFT);
            }
            if (currstate[4] == 0)
            {
                epsilonList.Add(Direction.RIGHT);
            }
            if (currstate[6] == 0)
            {
                epsilonList.Add(Direction.BACK);
            }

            epsilonList.Add(Direction.STAY);
            int rand = Random.Range(0, epsilonList.Count);
            ourChoice = epsilonList [rand];
        }
        bool successfullymoved = successfullyMovedPos(ourChoice);

        manualMoveAllObjects();
        //Get Vopt For new State
        float Vopt = findVopt(rlGameState);

        // Nick Told us

        // if Dead -> Save the q values to a text file. (and later reload it.)
        //Get Reward <Alywn's function>
        float r = 0;

        //+8 for forward,
        if (ourChoice == Direction.FRONT /* && successfullymoved*/)
        {
            countFront++;
            r += 7;
            //-9 for backward,
        }
        else if (ourChoice == Direction.BACK /* && successfullymoved*/)
        {
            countFront--;
            r -= 9;
        }
        else if (ourChoice == Direction.STAY)
        {
            r -= 8;
        }

        /*if (!successfullymoved){
         *      r -= 50;
         * }*/
        //-1000 if dead,
        if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead())
        {
            r -= 300;
        }
        //+10 every 5 streets

        /*if (countFront == 5) {
         *      r += 50;
         *      countFront = 0;
         * } */
        //-D if the road infront of the player is a river (Distance to closest log)
        //Debug.Log("REWARD: " + r);
        //Calculate Eta?
        float eta = 0.01f;


        var key = stateActionToString(currstate, ourChoice);

        if (!qvalues.ContainsKey(key))
        {
            qvalues [key] = 0;
        }

        //Q learning Function
        //qvalues[key] -= eta * (qvalues[key] - (r + discountFactor * Vopt));
        qvalues[key] = (1 - eta) * qvalues[key] + eta * (r + discountFactor * Vopt);
        //Debug.Log (temp + " == " + qvalues [key]);

        if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl> ().IsDead())
        {
            saveDictionary();
            saveIteration();
            saveHighestScore();
            saveScore();

            GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().RestartGame();
        }

        Moved = true;
    }
    public void FindBestMove()
    {
        RLGameState rlGameState = new RLGameState();
        List <int>  currstate   = rlGameState.GetCurrentFAState();


        Qvalue_Direction_Pair qvalueDirectionPair = FindVOpt(currstate, weight);


        // initialization for the colliders
        playerCollider = gameState.GetPlayer();
        carColliders   = gameState.GetCarColliders(playerCollider, lookRadius);
        logColliders   = gameState.GetLogColliders(playerCollider, lookRadius);

        var depth    = depthSetting;
        var bestMove = recurseFunction(0, depth, playerCollider.transform.position, true);
        var move     = (Direction)System.Enum.Parse(typeof(Direction), bestMove[1]);

        movePlayer(move);
        GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().clip();

        manualMoveAllObjects();

        var switchup = gameState.isPlayerDead(playerCollider, logColliders, carColliders);
        var r        = reward(move, switchup);

        List <int>            some = rlGameState.GetCurrentFAState();
        Qvalue_Direction_Pair Vopt = FindVOpt(some, weight);
        float vopt = Vopt.qvalue;
        //state | move | reward | statedash
        // 0,0,0,0,0,0,0,0,0,0,0,0|1|0.233| 0,0,0,0,0,0,0,0,0
        float eta = 0.01f;

        float constant = eta * (qvalueDirectionPair.qvalue - (r + vopt));

        weight = updateWeight(weight, addActionToCurrState(currstate, move), constant);
        saveWeight();
        string state = "";

        for (int i = 0; i < currstate.Count; i++)
        {
            if (i == currstate.Count - 1)
            {
                state += currstate [i];
            }
            else
            {
                state += currstate[i] + ",";
            }
        }
        state += "|";
        state += (int)move + "";
        state += "|";
        state += r + "";
        state += "|";

        for (int i = 0; i < some.Count; i++)
        {
            if (i == (some.Count - 1))
            {
                state += some [i];
            }
            else
            {
                state += some[i] + ",";
            }
        }

        saveData(state);
        clearStates(); // after finding a best move, clear all states and refind another

        Moved = true;
    }