Esempio n. 1
0
        public int PickMove(string state)
        {
            var random = new Random();

            //Sometimes make a random move
            if ((random.NextDouble() < ExploreRate) || (!QTable.ContainsKey(state)))
            {
                return(random.Next(4));
            }

            var currentOptionsQVals = QTable[state];
            var validActionList     = new List <int>();
            var maxQVal             = currentOptionsQVals.Max();

            //Get a list of all optimal values
            for (int i = 0; i < currentOptionsQVals.Count(); i++)
            {
                if (currentOptionsQVals[i] == maxQVal)
                {
                    validActionList.Add(i);
                }
            }
            ;

            //pick one at random
            int action = validActionList[random.Next(validActionList.Count())];

            return(action);
        }
Esempio n. 2
0
        public void Learn(string state, int action, double reward, string nextState)
        {
            if (!QTable.ContainsKey(state))
            {
                QTable.Add(state, new List <double>()
                {
                    0, 0, 0, 0
                });
            }

            if (!QTable.ContainsKey(nextState))
            {
                QTable.Add(nextState, new List <double>()
                {
                    0, 0, 0, 0
                });
            }

            var currentQ = QTable[state][action];
            var newQ     = reward + DiscountFactor * QTable[nextState].Max();

            QTable[state][action] += LearningRate * (newQ - currentQ);
        }