private void __plot_policy(ReinforcementLearning.RLearning ql) { /** * Draw the result POLICY!!! */ StringBuilder sb = new StringBuilder(); Hashtable hs = new Hashtable(); optimalPath.Clear(); /** * Normalize the POLICY */ foreach (KeyValuePair<Point, GridHelper.Directions> s in ql.QTable.Keys) { if (hs.Contains(s.Key)) { var a = hs[s.Key] as List<KeyValuePair<GridHelper.Directions, float>>; a.Add(new KeyValuePair<GridHelper.Directions, float>(s.Value, (float)ql.QTable[s])); } else { hs.Add(s.Key, new List<KeyValuePair<GridHelper.Directions, float>>() { new KeyValuePair<GridHelper.Directions, float>(s.Value, (float)ql.QTable[s]) }); } if (optimalPath.Contains(s.Key)) { if ((float)ql.QTable[s] > ((List<KeyValuePair<float, GridHelper.Directions>>)optimalPath[s.Key])[0].Key) optimalPath[s.Key] = new List<KeyValuePair<float, GridHelper.Directions>>() { new KeyValuePair<float, GridHelper.Directions>((float)ql.QTable[s], s.Value) }; else if ((float)ql.QTable[s] == ((List<KeyValuePair<float, GridHelper.Directions>>)optimalPath[s.Key])[0].Key) ((List<KeyValuePair<float, GridHelper.Directions>>)optimalPath[s.Key]).Add(new KeyValuePair<float, GridHelper.Directions>((float)ql.QTable[s], s.Value)); } else optimalPath.Add(s.Key, new List<KeyValuePair<float, GridHelper.Directions>>() { new KeyValuePair<float, GridHelper.Directions>((float)ql.QTable[s], s.Value) }); } var margin = 23; /** * Draw the triangles and POLICY values upon them */ using (var gfx = this.grid.CreateGraphics()) { foreach (Point cell in hs.Keys) { foreach (KeyValuePair<GridHelper.Directions, float> dir in hs[cell] as List<KeyValuePair<GridHelper.Directions, float>>) { var p = g.abs2grid(cell); switch (dir.Key) { case GridHelper.Directions.NORTH: gfx.FillPolygon(Brushes.LightBlue, new Point[] { new Point(p.X - margin, p.Y - margin), new Point(p.X + margin, p.Y - margin), new Point(p.X, p.Y - 2 * margin) }); this.g.Write(dir.Value.ToString("F1"), new Point(p.X - margin + 7, p.Y - 2 * margin + 7), gfx, Brushes.DarkBlue, new Font("Arial", 10, FontStyle.Bold)); break; case GridHelper.Directions.EAST: gfx.FillPolygon(Brushes.LightBlue, new Point[] { new Point(p.X + margin, p.Y - margin), new Point(p.X + margin, p.Y + margin), new Point(p.X + 2 * margin, p.Y) }); this.g.Write(dir.Value.ToString("F1"), new Point(p.X + margin - 4, p.Y - margin / 2), gfx, Brushes.DarkBlue, new Font("Arial", 10, FontStyle.Bold)); break; case GridHelper.Directions.SOUTH: gfx.FillPolygon(Brushes.LightBlue, new Point[] { new Point(p.X - margin, p.Y + margin), new Point(p.X + margin, p.Y + margin), new Point(p.X, p.Y + 2 * margin) }); this.g.Write(dir.Value.ToString("F1"), new Point(p.X - margin + 10, p.Y + margin), gfx, Brushes.DarkBlue, new Font("Arial", 10, FontStyle.Bold)); break; case GridHelper.Directions.WEST: gfx.FillPolygon(Brushes.LightBlue, new Point[] { new Point(p.X - margin, p.Y - margin), new Point(p.X - margin, p.Y + margin), new Point(p.X - 2 * margin, p.Y) }); this.g.Write(dir.Value.ToString("F1"), new Point(p.X - 2 * margin, p.Y - margin / 2), gfx, Brushes.DarkBlue, new Font("Arial", 10, FontStyle.Bold)); break; case GridHelper.Directions.HOLD: this.g.Write(dir.Value.ToString("F1"), new Point(p.X - 2 * margin, p.Y - 2 * margin), gfx, Brushes.DarkBlue, new Font("Arial", 10, FontStyle.Bold)); break; default: break; } } } hs.Clear(); hs = new Hashtable(); /** * Normalize the visited states count */ foreach (KeyValuePair<Point, GridHelper.Directions> cell in ql.VisitedStateActions.Keys) { long count = (long)ql.VisitedStateActions[cell]; if (hs.Contains(cell.Key)) hs[cell.Key] = (long)hs[cell.Key] + count; else hs.Add(cell.Key, count); } /** * Plot the visited states */ foreach (Point cell in hs.Keys) { var p = g.abs2grid(cell); this.g.Write("#" + hs[cell].ToString(), new Point(p.X + 2 * margin / 3, p.Y - 2 * margin), gfx, Brushes.Brown, new Font("Arial", 10, FontStyle.Bold)); } } }
private void __plot_utility(ReinforcementLearning.IUtility tdl, ReinforcementLearning.IUtility adp) { var margin = 23; var i = 0; using (var gfx = this.grid.CreateGraphics()) { foreach (var util in new List<ReinforcementLearning.IUtility> { tdl, adp }) { foreach (Point cell in util.UTable.Keys) { var p = g.abs2grid(cell); var f = (float)util.UTable[cell]; var txt = f.ToString("0.##"); if (i == 0) { p = new Point(p.X + 2 * margin / 3, p.Y + margin + 7); } else { p = new Point(p.X - 2 * margin, p.Y + margin + 7); } this.g.Write((i == 0 ? "T" : "A") + txt, p, gfx, Brushes.Brown, new Font("Arial", 8, FontStyle.Bold)); } i += 1; } } }