示例#1
0
        public void Set(QLearning q)
        {
            var points = new Point[Size * Size];

            for (int row = 0; row < Size; row++)
            {
                for (int col = 0; col < Size; col++)
                {
                    points[row * Size + col] = new Point(row, col);
                }
            }

            State ToState(Point p) => new State(p.Row, p.Col);

            void Fill(Dictionary <State, Dictionary <State, float> > dictionary, float value)
            {
                foreach (var statePoint in points)
                {
                    var state   = ToState(statePoint);
                    var actions = dictionary[state] = new Dictionary <State, float>();
                    foreach (var actionPoint in points)
                    {
                        actions.Add(ToState(actionPoint), value);
                    }
                }
            }

            Fill(q.Q, 0);
            Fill(q.R, -1);

            foreach (var from in points)
            {
                foreach (var to in points)
                {
                    if (CanMove(from, to))
                    {
                        float value = 0;
                        if (Blocks.Contains(to))
                        {
                            value = -10;
                        }
                        if (Start.Equals(to))
                        {
                            value = 0;
                        }
                        if (Cheese.Equals(to))
                        {
                            value = 10;
                        }
                        q.R[ToState(from)][ToState(to)] = value;
                    }
                }
            }
        }
示例#2
0
        static void Main(string[] args)
        {
            const float Gamma    = 0.8f;
            const int   Size     = 4;
            const int   Episodes = 1000;
            const int   Blocks   = 5;

            Random        r             = new Random(42);
            FindTheCheese findTheCheese = FindTheCheese.Generate(r, Size, Blocks);
            QLearning     qLearning     = new QLearning(Gamma);

            findTheCheese.Set(qLearning);
            ConsoleExtensions.WriteMatrix(qLearning.R, "R");

            qLearning.Learn(findTheCheese.Start, findTheCheese.Cheese, Episodes, Size, r);
            ConsoleExtensions.WriteMatrix(qLearning.Q, "Final Q");
            qLearning.WritePath(findTheCheese.Start, findTheCheese.Cheese);
        }