コード例 #1
0
        public void QLearningGridPathTest()
        {
            var random     = new Random(1337);
            var gridSize   = 16;
            var grid       = new int[gridSize, gridSize];
            var ql         = new QLearning(gridSize * gridSize, 4, new EpsilonGreedyExplorationPolicy(0.0), 0.3, 0.8);
            var pathLength = 20;
            var maxReward  = 0.0;

            for (var x = 0; x < gridSize; x++)
            {
                for (var y = 0; y < gridSize; y++)
                {
                    grid[x, y] = random.Next(0, 100);
                }
            }

            // train
            for (var i = 0; i < 100; i++)
            {
                var x             = random.Next(0, gridSize);
                var y             = random.Next(0, gridSize);
                var currentReward = 0.0;

                ql.Begin(x + y * gridSize);

                for (var j = 0; j < pathLength; j++)
                {
                    switch ((Action)ql.SelectedAction)
                    {
                    case Action.UP:
                        y++;
                        break;

                    case Action.DOWN:
                        y--;
                        break;

                    case Action.LEFT:
                        x--;
                        break;

                    case Action.RIGHT:
                        x++;
                        break;
                    }

                    var r = 0;
                    if (x < 0 || x >= gridSize || y < 0 || y >= gridSize)
                    {
                        x = x < 0 ? 0 : x >= gridSize ? gridSize - 1 : x;
                        y = y < 0 ? 0 : y >= gridSize ? gridSize - 1 : y;
                    }
                    else
                    {
                        r = grid[x, y];
                    }

                    currentReward += r;

                    var nextState = x + y * gridSize;
                    ql.Step(r, nextState);
                }

                if (currentReward > maxReward)
                {
                    maxReward = currentReward;
                }
            }

            Assert.AreEqual(896, maxReward);
        }