public void setUp() { cw = CellWorldFactory.CreateCellWorldForFig17_1(); mdp = MDPFactory.createMDPForFigure17_3(cw); pi = new PolicyIteration <Cell <double>, CellWorldAction>( new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(50, 1.0)); }
static void policyIterationDemo() { System.Console.WriteLine("DEMO: Policy Iteration"); System.Console.WriteLine("======================"); System.Console.WriteLine("Figure 17.3"); System.Console.WriteLine("-----------"); CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); IMarkovDecisionProcess <Cell <double>, CellWorldAction> mdp = MDPFactory.createMDPForFigure17_3(cw); PolicyIteration <Cell <double>, CellWorldAction> pi = new PolicyIteration <Cell <double>, CellWorldAction>( new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(50, 1.0)); IPolicy <Cell <double>, CellWorldAction> policy = pi.policyIteration(mdp); System.Console.WriteLine("(1,1) = " + policy.action(cw.GetCellAt(1, 1))); System.Console.WriteLine("(1,2) = " + policy.action(cw.GetCellAt(1, 2))); System.Console.WriteLine("(1,3) = " + policy.action(cw.GetCellAt(1, 3))); System.Console.WriteLine("(2,1) = " + policy.action(cw.GetCellAt(2, 1))); System.Console.WriteLine("(2,3) = " + policy.action(cw.GetCellAt(2, 3))); System.Console.WriteLine("(3,1) = " + policy.action(cw.GetCellAt(3, 1))); System.Console.WriteLine("(3,2) = " + policy.action(cw.GetCellAt(3, 2))); System.Console.WriteLine("(3,3) = " + policy.action(cw.GetCellAt(3, 3))); System.Console.WriteLine("(4,1) = " + policy.action(cw.GetCellAt(4, 1))); System.Console.WriteLine("(4,2) = " + policy.action(cw.GetCellAt(4, 2))); System.Console.WriteLine("(4,3) = " + policy.action(cw.GetCellAt(4, 3))); System.Console.WriteLine("========================="); }
private static void ExecutarAlgoritmos(string ambiente, double[] matrizRecompensa, IList <double[][]> matrizTransicao, int largura, int altura, int qtdePisos) { var epsilons = new double[] { Math.Pow(10, -20), Math.Pow(10, -15), Math.Pow(10, -10), Math.Pow(10, -5), Math.Pow(10, -1) }; var gamas = new double[] { 1.0, 0.99, 0.9, 0.8, 0.5 }; var ms = new int[] { 1, 2, 3 }; //var epsilons = new double[] { Math.Pow(10, -5) }; //var gamas = new double[] { 0.9 }; //var ms = new int[] { 1 }; using (var sw = new StreamWriter($"resultado_{ambiente}_{DateTime.Now.Ticks.ToString()}.csv", false)) { sw.WriteLine("algoritmo;gama;epsilon;m;totalIterations;tempo;estadosIndecisos"); foreach (var gama in gamas) { Console.WriteLine($"{ambiente} - Policy Iteration - Gama = {gama}"); var retorno = PolicyIteration.Run( matrizRecompensa, matrizTransicao, gama); Console.WriteLine($"Convergiu em {retorno.tempo} num total de {retorno.totalIterations} iterações"); sw.WriteLine($"pi;{gama};-;-;{retorno.totalIterations};{retorno.tempo};{retorno.estadosIndecisos}"); Print(largura, altura, qtdePisos, retorno.pi, retorno.vPi); Console.WriteLine(); foreach (var epsilon in epsilons) { Console.WriteLine($"{ambiente} - Value Iteration - Gama = {gama} - Epsilon = {epsilon}"); retorno = ValueIteration.Run( matrizRecompensa, matrizTransicao, gama, epsilon); Console.WriteLine($"Convergiu em {retorno.tempo} num total de {retorno.totalIterations} iterações"); sw.WriteLine($"vi;{gama};{epsilon};-;{retorno.totalIterations};{retorno.tempo};{retorno.estadosIndecisos}"); Print(largura, altura, qtdePisos, retorno.pi, retorno.vPi); Console.WriteLine(); Console.WriteLine($"{ambiente} - Prioritized Sweeping - Gama = {gama} - Epsilon = {epsilon}"); retorno = PrioritizedSweeping.Run( matrizRecompensa, matrizTransicao, gama, epsilon); Console.WriteLine($"Convergiu em {retorno.tempo} num total de {retorno.totalIterations} iterações"); sw.WriteLine($"ps;{gama};{epsilon};-;{retorno.totalIterations};{retorno.tempo};{retorno.estadosIndecisos}"); Print(largura, altura, qtdePisos, retorno.pi, retorno.vPi); Console.WriteLine(); foreach (var m in ms) { Console.WriteLine($"{ambiente} - Modified Policy Iteration - Gama = {gama} - Epsilon = {epsilon} - m = {m}"); retorno = ModifiedPolicyIteration.Run( matrizRecompensa, matrizTransicao, gama, epsilon, m); Console.WriteLine($"Convergiu em {retorno.tempo} num total de {retorno.totalIterations} iterações"); sw.WriteLine($"mpi;{gama};{epsilon};{m};{retorno.totalIterations};{retorno.tempo};{retorno.estadosIndecisos}"); Print(largura, altura, qtdePisos, retorno.pi, retorno.vPi); Console.WriteLine(); } } } } }