public void Set(QLearning q) { var points = new Point[Size * Size]; for (int row = 0; row < Size; row++) { for (int col = 0; col < Size; col++) { points[row * Size + col] = new Point(row, col); } } State ToState(Point p) => new State(p.Row, p.Col); void Fill(Dictionary <State, Dictionary <State, float> > dictionary, float value) { foreach (var statePoint in points) { var state = ToState(statePoint); var actions = dictionary[state] = new Dictionary <State, float>(); foreach (var actionPoint in points) { actions.Add(ToState(actionPoint), value); } } } Fill(q.Q, 0); Fill(q.R, -1); foreach (var from in points) { foreach (var to in points) { if (CanMove(from, to)) { float value = 0; if (Blocks.Contains(to)) { value = -10; } if (Start.Equals(to)) { value = 0; } if (Cheese.Equals(to)) { value = 10; } q.R[ToState(from)][ToState(to)] = value; } } } }
static void Main(string[] args) { const float Gamma = 0.8f; const int Size = 4; const int Episodes = 1000; const int Blocks = 5; Random r = new Random(42); FindTheCheese findTheCheese = FindTheCheese.Generate(r, Size, Blocks); QLearning qLearning = new QLearning(Gamma); findTheCheese.Set(qLearning); ConsoleExtensions.WriteMatrix(qLearning.R, "R"); qLearning.Learn(findTheCheese.Start, findTheCheese.Cheese, Episodes, Size, r); ConsoleExtensions.WriteMatrix(qLearning.Q, "Final Q"); qLearning.WritePath(findTheCheese.Start, findTheCheese.Cheese); }