示例#1
0
    //Best to return direction and the q_opt value
    public Qvalue_Direction_Pair FindVOpt(List <int> currstate, List <float> weight)
    {
        //For each direction calculate Q_opt(currstate+[Direction Vector]) and pic the largest.
        float     maxValue = -Mathf.Infinity;
        Direction bestdir  = Direction.STAY;

        foreach (Direction dir in System.Enum.GetValues(typeof(Direction)))
        {
            List <int> tempstate = copyState(currstate);            //Supposed to Copy state
            tempstate = addActionToCurrState(tempstate, dir);

            float Q_opt = linearCombination(weight, tempstate);
            if (Q_opt > maxValue)
            {
                maxValue = Q_opt;
                bestdir  = dir;
            }
        }
        Qvalue_Direction_Pair returnValue = new Qvalue_Direction_Pair();

        returnValue.dir    = bestdir;
        returnValue.qvalue = maxValue;
        Debug.Log(bestdir);
        return(returnValue);
    }
示例#2
0
    //Best to return direction and the q_opt value
    private Qvalue_Direction_Pair FindVOpt(List <int> currstate, List <float> weight)
    {
        //For each direction calculate Q_opt(currstate+[Direction Vector]) and pic the largest.
        float     maxValue = 0.0f;
        Direction bestdir  = Direction.STAY;

        foreach (Direction dir in System.Enum.GetValues(typeof(Direction)))
        {
            //			Debug.Log("Here lies the curr state");
            //			string currstateString = "";
            //			foreach (var x in currstate) {
            //				currstateString += "" + x;
            //			}
            //			Debug.Log(currstateString);

            List <int> tempstate = copyState(currstate); //Supposed to Copy state
            tempstate = addActionToCurrState(tempstate, dir);

            //			Debug.Log("Here lies the second curr state");
            //			string currRound2stateString = "";
            //			foreach (var x in currstate) {
            //				currRound2stateString += "" + x;
            //			}
            //			Debug.Log(currRound2stateString);

            //			Debug.Log("Here lies the temp state");
            //			string tempString = "";
            //			foreach (var x in tempstate) {
            //				tempString += "" + x;
            //			}
            //			Debug.Log(tempString);

            float Q_opt = linearCombination(weight, tempstate);
            if (Q_opt > maxValue)
            {
                maxValue = Q_opt;
                bestdir  = dir;
            }
        }
        Qvalue_Direction_Pair returnValue = new Qvalue_Direction_Pair();

        returnValue.dir    = bestdir;
        returnValue.qvalue = maxValue;
        return(returnValue);
    }
示例#3
0
    public void MakeMove()
    {
        //		foreach (var i in weight) {
        //			Debug.Log (i);
        //		}
        //CurrentState <Alywn's function>
        RLGameState rlGameState = new RLGameState();
        List <int>  currstate   = rlGameState.GetCurrentFAState();
        //		foreach (var i in currstate) {
        //			Debug.Log (i);
        //		}
        //First Change: MakeChoice.
        Qvalue_Direction_Pair qvalueDirectionPair = FindVOpt(currstate, weight);
        Direction             ourChoice           = qvalueDirectionPair.dir;



        float randFloat = Random.Range(0.0f, 1.0f);

        if (randFloat < epsilon)
        {
            List <Direction> epsilonList = new List <Direction>();
            if (currstate[1] == 0)
            {
                epsilonList.Add(Direction.FRONT);
            }
            if (currstate[3] == 0)
            {
                epsilonList.Add(Direction.LEFT);
            }
            if (currstate[4] == 0)
            {
                epsilonList.Add(Direction.RIGHT);
            }
            if (currstate[6] == 0)
            {
                epsilonList.Add(Direction.BACK);
            }

            epsilonList.Add(Direction.STAY);
            int rand = Random.Range(0, epsilonList.Count);
            ourChoice = epsilonList[rand];
        }

        bool successfullymoved = successfullyMovedPos(ourChoice);

        manualMoveAllObjects();
        //Get Vopt For new State
        Qvalue_Direction_Pair Vopt = FindVOpt(rlGameState.GetCurrentFAState(), weight);
        float vopt = Vopt.qvalue;

        float r = 0;

        //+8 for forward,
        if (ourChoice == Direction.FRONT /* && successfullymoved*/)
        {
            r += 7;
        }
        else if (ourChoice == Direction.BACK /* && successfullymoved*/)
        {
            r -= 9;
        }
        else if (ourChoice == Direction.STAY)
        {
            r -= 8;
        }

        if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead())
        {
            r -= 300;
        }

        float eta = 0.01f;

        float constant = eta * (qvalueDirectionPair.qvalue - (r + vopt));

        weight = updateWeight(weight, addActionToCurrState(currstate, ourChoice), constant);
        //		var weightstring = "";
        //		foreach (var dickhead in weight) {
        //			weightstring += dickhead;
        //		}
        //		Debug.Log ("WeightSTRING: " + weightstring);

        if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead())
        {
            //Save Weight Vector at death
            saveWeight();
            saveScore();
            GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().RestartGame();
        }

        Moved = true;
    }
示例#4
0
    public void FindBestMove()
    {
        RLGameState rlGameState = new RLGameState();
        List <int>  currstate   = rlGameState.GetCurrentFAState();


        Qvalue_Direction_Pair qvalueDirectionPair = FindVOpt(currstate, weight);


        // initialization for the colliders
        playerCollider = gameState.GetPlayer();
        carColliders   = gameState.GetCarColliders(playerCollider, lookRadius);
        logColliders   = gameState.GetLogColliders(playerCollider, lookRadius);

        var depth    = depthSetting;
        var bestMove = recurseFunction(0, depth, playerCollider.transform.position, true);
        var move     = (Direction)System.Enum.Parse(typeof(Direction), bestMove[1]);

        movePlayer(move);
        GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().clip();

        manualMoveAllObjects();

        var switchup = gameState.isPlayerDead(playerCollider, logColliders, carColliders);
        var r        = reward(move, switchup);

        List <int>            some = rlGameState.GetCurrentFAState();
        Qvalue_Direction_Pair Vopt = FindVOpt(some, weight);
        float vopt = Vopt.qvalue;
        //state | move | reward | statedash
        // 0,0,0,0,0,0,0,0,0,0,0,0|1|0.233| 0,0,0,0,0,0,0,0,0
        float eta = 0.01f;

        float constant = eta * (qvalueDirectionPair.qvalue - (r + vopt));

        weight = updateWeight(weight, addActionToCurrState(currstate, move), constant);
        saveWeight();
        string state = "";

        for (int i = 0; i < currstate.Count; i++)
        {
            if (i == currstate.Count - 1)
            {
                state += currstate [i];
            }
            else
            {
                state += currstate[i] + ",";
            }
        }
        state += "|";
        state += (int)move + "";
        state += "|";
        state += r + "";
        state += "|";

        for (int i = 0; i < some.Count; i++)
        {
            if (i == (some.Count - 1))
            {
                state += some [i];
            }
            else
            {
                state += some[i] + ",";
            }
        }

        saveData(state);
        clearStates(); // after finding a best move, clear all states and refind another

        Moved = true;
    }