static void valueIterationDemo() { System.Console.WriteLine("DEMO: Value Iteration"); System.Console.WriteLine("====================="); System.Console.WriteLine("Figure 17.3"); System.Console.WriteLine("-----------"); CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); IMarkovDecisionProcess <Cell <double>, CellWorldAction> mdp = MDPFactory.createMDPForFigure17_3(cw); ValueIteration <Cell <double>, CellWorldAction> vi = new ValueIteration <Cell <double>, CellWorldAction>(1.0); IMap <Cell <double>, double> U = vi.valueIteration(mdp, 0.0001); System.Console.WriteLine("(1,1) = " + U.Get(cw.GetCellAt(1, 1))); System.Console.WriteLine("(1,2) = " + U.Get(cw.GetCellAt(1, 2))); System.Console.WriteLine("(1,3) = " + U.Get(cw.GetCellAt(1, 3))); System.Console.WriteLine("(2,1) = " + U.Get(cw.GetCellAt(2, 1))); System.Console.WriteLine("(2,3) = " + U.Get(cw.GetCellAt(2, 3))); System.Console.WriteLine("(3,1) = " + U.Get(cw.GetCellAt(3, 1))); System.Console.WriteLine("(3,2) = " + U.Get(cw.GetCellAt(3, 2))); System.Console.WriteLine("(3,3) = " + U.Get(cw.GetCellAt(3, 3))); System.Console.WriteLine("(4,1) = " + U.Get(cw.GetCellAt(4, 1))); System.Console.WriteLine("(4,2) = " + U.Get(cw.GetCellAt(4, 2))); System.Console.WriteLine("(4,3) = " + U.Get(cw.GetCellAt(4, 3))); System.Console.WriteLine("========================="); }
public void setUp() { cw = CellWorldFactory.CreateCellWorldForFig17_1(); mdp = MDPFactory.createMDPForFigure17_3(cw); vi = new ValueIteration<Cell<double>, CellWorldAction>(1.0); }
private static void ExecutarAlgoritmos(string ambiente, double[] matrizRecompensa, IList <double[][]> matrizTransicao, int largura, int altura, int qtdePisos) { var epsilons = new double[] { Math.Pow(10, -20), Math.Pow(10, -15), Math.Pow(10, -10), Math.Pow(10, -5), Math.Pow(10, -1) }; var gamas = new double[] { 1.0, 0.99, 0.9, 0.8, 0.5 }; var ms = new int[] { 1, 2, 3 }; //var epsilons = new double[] { Math.Pow(10, -5) }; //var gamas = new double[] { 0.9 }; //var ms = new int[] { 1 }; using (var sw = new StreamWriter($"resultado_{ambiente}_{DateTime.Now.Ticks.ToString()}.csv", false)) { sw.WriteLine("algoritmo;gama;epsilon;m;totalIterations;tempo;estadosIndecisos"); foreach (var gama in gamas) { Console.WriteLine($"{ambiente} - Policy Iteration - Gama = {gama}"); var retorno = PolicyIteration.Run( matrizRecompensa, matrizTransicao, gama); Console.WriteLine($"Convergiu em {retorno.tempo} num total de {retorno.totalIterations} iterações"); sw.WriteLine($"pi;{gama};-;-;{retorno.totalIterations};{retorno.tempo};{retorno.estadosIndecisos}"); Print(largura, altura, qtdePisos, retorno.pi, retorno.vPi); Console.WriteLine(); foreach (var epsilon in epsilons) { Console.WriteLine($"{ambiente} - Value Iteration - Gama = {gama} - Epsilon = {epsilon}"); retorno = ValueIteration.Run( matrizRecompensa, matrizTransicao, gama, epsilon); Console.WriteLine($"Convergiu em {retorno.tempo} num total de {retorno.totalIterations} iterações"); sw.WriteLine($"vi;{gama};{epsilon};-;{retorno.totalIterations};{retorno.tempo};{retorno.estadosIndecisos}"); Print(largura, altura, qtdePisos, retorno.pi, retorno.vPi); Console.WriteLine(); Console.WriteLine($"{ambiente} - Prioritized Sweeping - Gama = {gama} - Epsilon = {epsilon}"); retorno = PrioritizedSweeping.Run( matrizRecompensa, matrizTransicao, gama, epsilon); Console.WriteLine($"Convergiu em {retorno.tempo} num total de {retorno.totalIterations} iterações"); sw.WriteLine($"ps;{gama};{epsilon};-;{retorno.totalIterations};{retorno.tempo};{retorno.estadosIndecisos}"); Print(largura, altura, qtdePisos, retorno.pi, retorno.vPi); Console.WriteLine(); foreach (var m in ms) { Console.WriteLine($"{ambiente} - Modified Policy Iteration - Gama = {gama} - Epsilon = {epsilon} - m = {m}"); retorno = ModifiedPolicyIteration.Run( matrizRecompensa, matrizTransicao, gama, epsilon, m); Console.WriteLine($"Convergiu em {retorno.tempo} num total de {retorno.totalIterations} iterações"); sw.WriteLine($"mpi;{gama};{epsilon};{m};{retorno.totalIterations};{retorno.tempo};{retorno.estadosIndecisos}"); Print(largura, altura, qtdePisos, retorno.pi, retorno.vPi); Console.WriteLine(); } } } } }
private static void AnimateValueIteration() { double[] finalCost = GetFinalCost(xPoints, yPoints, xRange, yRange); var discretisedControl = new double[] { -1.0, 0.0, 1.0 }; Func <double[], double, double, double[]> stateEquation = (state, u, t) => Systems.DoubleIntegrator(state, u, t); // run the ValueIterate loop manually, for plotting var stateValues = ArrayEx2.Init(xPoints, yPoints, (x, y) => new double[] { StateValueForIndex(xPoints, x, xRange), StateValueForIndex(yPoints, y, yRange) } ); var updatedStatesPerControl = discretisedControl .Select(u => stateValues.Select(state => stateEquation(state, 0.01, u)).ToArray()) .ToArray(); var size = xPoints * yPoints; var currentCost = finalCost.ToArray(); var newCosts = new double[size]; var newControl = new double[size]; var gamma = 0.999; var plotEveryN = 10; var xTolerance = xRange / xPoints; var yTolerance = yRange / yPoints; // Minimum time control. U is not used (would be for LQR step) Func <double[], double, double> costPerStepFunc = (s, u) => Math.Abs(s[0]) < xTolerance && Math.Abs(s[1]) < yTolerance ? 0.0 : 1.0; for (var count = 0; count < 1000; ++count) { if (count % plotEveryN == 0) { var filenameNumberString = (count / plotEveryN).ToString("D3"); var controlPath = string.Format("E:/temp/plot/control/{0}.png", filenameNumberString); var costPath = string.Format("E:/temp/plot/cost/{0}.png", filenameNumberString); Write2DPlot(controlPath, "control", newControl, xPoints, yPoints, xRange, yRange); Write2DPlot(costPath, "cost", currentCost, xPoints, yPoints, xRange, yRange); } ValueIteration.ValueIterationStep(finalCost, currentCost, newCosts, newControl, updatedStatesPerControl, discretisedControl, gamma, costPerStepFunc, xPoints, yPoints, xRange, yRange); if (count % plotEveryN == 0) { var maxDiff = newCosts.Zip(currentCost, (x, y) => Math.Abs(x - y)).Max(); Console.WriteLine(count.ToString("D3") + " " + maxDiff.ToString()); } Array.Copy(newCosts, currentCost, newCosts.Length); } }
private static void PlotTrajectory() { double[] finalCost = GetFinalCost(xPoints, yPoints, xRange, yRange); var discretisedControl = new double[] { -1.0, 0.0, 1.0 }; Func <double[], double, double, double[]> stateEquation = (s, u, t) => Systems.DoubleIntegrator(s, u, t); var result = ValueIteration.ValueIterate(finalCost, discretisedControl, stateEquation, xPoints, yPoints, xRange, yRange, 10000); var currentCost = result.Item1; var controls = result.Item2; var count = result.Item3; Console.WriteLine("Converged in {0} iterations", count); // initial state at x = -2.0, heading in -ve x direction with v = -1.5 units/sec var initialState = new double[] { -2.0, -1.5 }; var state = initialState.ToArray(); const int numIterations = 2000; List <double[]> statesAndControls = new List <double[]>(); for (var iteration = 0; iteration < numIterations; iteration++) { var control = MathHelper.FindInterpolatedCost(state, controls, xPoints, xRange, yPoints, yRange); // var control = AnalyticalDoubleIntegratorBangBangControl(state); state = Systems.DoubleIntegrator(state, 0.01, control); if (iteration % 10 == 0) { statesAndControls.Add(new[] { state[0], state[1], control }); } } // // using (var process = new Process()) { process.StartInfo.FileName = _pathToGnuPlot; process.StartInfo.CreateNoWindow = true; process.StartInfo.UseShellExecute = false; process.StartInfo.RedirectStandardInput = true; process.Start(); using (StreamWriter sw = process.StandardInput) { sw.WriteLine("set view map"); // sw.WriteLine("set dgrid3d"); sw.WriteLine("set terminal png size 640,480"); var outputFileName = "E:/temp/plot/trajectory.png"; var title = "trajectory"; var outputCommand = "set output " + '"' + outputFileName + '"'; sw.WriteLine(outputCommand); sw.WriteLine("set title \"" + title + "\""); sw.WriteLine("set xlabel \"x\""); sw.WriteLine("set ylabel \"v\""); sw.WriteLine("set style data lines"); // with lines lc rgb 'blue' // var inputText = "splot '-' using 1:2:3 with lines ls 1" + "\n"; var inputText = "splot '-' using 1:2:3 with lines palette" + "\n"; sw.WriteLine(inputText); foreach (var stateAndControl in statesAndControls) { var xStateValue = stateAndControl[0]; var vStateValue = stateAndControl[1]; var control = stateAndControl[2]; sw.WriteLine(string.Format("{0} {1} {2}", xStateValue, vStateValue, control)); } sw.WriteLine(" e "); // terminate data sw.Flush(); } } }