コード例 #1
0
        public int PickMove(string state)
        {
            var random = new Random();

            //Sometimes make a random move
            if ((random.NextDouble() < ExploreRate) || (!QTable.ContainsKey(state)))
            {
                return(random.Next(4));
            }

            var currentOptionsQVals = QTable[state];
            var validActionList     = new List <int>();
            var maxQVal             = currentOptionsQVals.Max();

            //Get a list of all optimal values
            for (int i = 0; i < currentOptionsQVals.Count(); i++)
            {
                if (currentOptionsQVals[i] == maxQVal)
                {
                    validActionList.Add(i);
                }
            }
            ;

            //pick one at random
            int action = validActionList[random.Next(validActionList.Count())];

            return(action);
        }
コード例 #2
0
        public static string InitFileStructure(QTable Table)
        {
            string res = "[\n  ";

            for (int i = 0; i < Table.tableStructure.states; i++)
            {
                for (int j = 0; j < Table.tableStructure.actions; j++)
                {
                    res += $"{Table.tableStructure.actionValues[i][j]}, ";
                }
                res += "\n  ";
            }

            return(res + "\b\b]");
        }
コード例 #3
0
        public void Learn(string state, int action, double reward, string nextState)
        {
            if (!QTable.ContainsKey(state))
            {
                QTable.Add(state, new List <double>()
                {
                    0, 0, 0, 0
                });
            }

            if (!QTable.ContainsKey(nextState))
            {
                QTable.Add(nextState, new List <double>()
                {
                    0, 0, 0, 0
                });
            }

            var currentQ = QTable[state][action];
            var newQ     = reward + DiscountFactor * QTable[nextState].Max();

            QTable[state][action] += LearningRate * (newQ - currentQ);
        }
コード例 #4
0
        static void Main(string[] args)
        {
            var actionList = new List <double[]>()
            {
            };
            int states  = 16;
            int actions = 4;

            for (int i = 0; i < states; i++)
            {
                actionList.Add(QTable.Zeros(actions));
            }
            table = new QTable(states, actions, new string[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15" }, actionList); //Actions: UP, DOWN, RIGHT, LEFT
            table.ReadExistingStructure();
            Console.WriteLine(QTable.InitStructure(table));
            //for (int repeater = 0; repeater < 10; repeater++)
            //{
            StartProcess();
            //}
            Console.WriteLine(QTable.InitStructure(table));
            table.SaveQTableToFile();
            Console.ReadLine();
        }