public void TestEmitMethod() { var c = new ILEngineUnitTestModel(1); var cType = c.GetType(); var cMethod = cType.GetMethod(nameof(c.GetValue)); var expected = c.Value; var builder = new ILInstructionBuilder(); builder.Write(OpCodes.Ldarg_0); builder.Write(OpCodes.Call, cMethod.MetadataToken); builder.Write(OpCodes.Ret); var frame = ILStackFrameBuilder.Build(builder.Instructions); frame.SetResolver(this.GetType()); frame.Args = new object[] { c }; frame.Execute(); var ilMethod = new ILMethod(MethodBase.GetCurrentMethod().Name, expected.GetType()); ilMethod.AddParameters(new[] { cType }); ilMethod.AddInstructions(builder.Instructions.ToArray()); ilMethod.Module = this.GetType().Module; var method = ilMethod.Compile(); var actual = method.Invoke(null, new object[] { c }); Assert.IsTrue((int)actual == expected, $"Actual: {actual}\r\nExpected:{expected}"); }
public void BuildOpCodesTest() { var opCode1 = OpCodes.Ret; var opCodes = (new[] { opCode1 }).ToList(); var frame = ILStackFrameBuilder.Build(opCodes); Assert.IsTrue(frame.Stream.Count == 1); Assert.IsTrue(frame.Stream[0].OpCode == opCode1); }
public void BuildILInstructionsTest() { var opCode1 = OpCodes.Ret; var instruction = ILInstruction.Create(opCode1); var instructions = (new[] { instruction }).ToList(); var frame = ILStackFrameBuilder.Build(instructions); Assert.IsTrue(frame.Stream.Count == 1); Assert.IsTrue(frame.Stream[0].OpCode == opCode1); }
internal static void TestStackFrameBuilder() { var opcodes = new List <OpCode> { OpCodes.Nop, }; var result = ILStackFrameBuilder.BuildAndExecute(opcodes); opcodes.Add(OpCodes.Ldarg_0); var result2 = ILStackFrameBuilder.BuildAndExecute(opcodes, args: new object[] { 1 }); System.Diagnostics.Debug.Assert(((int)result2.ReturnResult) == 1); }
public void BuildAndExecuteOpCodesWithTimeoutTest() { var opCode1 = OpCodes.Ldc_I4_1; var opCode2 = OpCodes.Ret; var opCodes = (new[] { opCode1, opCode2 }).ToList(); var frame = ILStackFrameBuilder.BuildAndExecute(opCodes, 1); Assert.IsTrue(frame.Stream.Count == 2); Assert.IsTrue(frame.Stream[0].OpCode == opCode1); Assert.IsTrue(frame.Stream[1].OpCode == opCode2); var actual = frame.ReturnResult; Assert.IsNotNull(actual); var expected = 1; Assert.IsTrue((int)actual == expected, $"Actual:{actual}\r\nExpected:{expected}\r\n"); }
public void BuildAndExecuteILInstructionsTest() { var opCode1 = OpCodes.Ldc_I4_1; var opCode2 = OpCodes.Ret; var instruction1 = ILInstruction.Create(opCode1); var instruction2 = ILInstruction.Create(opCode2); var instructions = (new[] { instruction1, instruction2 }).ToList(); var frame = ILStackFrameBuilder.BuildAndExecute(instructions); Assert.IsTrue(frame.Stream.Count == 2); Assert.IsTrue(frame.Stream[0].OpCode == opCode1); Assert.IsTrue(frame.Stream[1].OpCode == opCode2); var actual = frame.ReturnResult; Assert.IsNotNull(actual); var expected = 1; Assert.IsTrue((int)actual == expected, $"Actual:{actual}\r\nExpected:{expected}\r\n"); }
public void TestEmptyStackWithoutArgsOrLocalsAndNoOperandExclusions() { var allOpCodes = OpCodeLookup.OpCodes.Select(x => x.Value).AsQueryable<OpCode>(); var opcodesbyname = OpCodeLookup.OpCodesByName; var filters = OpCodeFilters.EmptyStackWithNoArgsLocalsAndNoInlineOperandFilters(); allOpCodes = allOpCodes.Where(x => filters.All((filter) => filter(x))); var rem = allOpCodes.ToList(); Assert.IsTrue(rem.Count == 12); for (var i = 0; i < rem.Count; i++) { var opCodes = rem.Skip(i).Take(1).ToList(); opCodes.Add(OpCodes.Ret); var result = ILStackFrameBuilder.BuildAndExecute(opCodes); Assert.IsTrue(result.ExecutedInstructions == 2); Assert.IsNull(result.Exception); } }
public void TestBuildEmptyStackWithArgsAndLocalsAndInlineOnlyOperandFilters() { var allOpCodes = OpCodeLookup.OpCodes.Select(x => x.Value).AsQueryable<OpCode>(); var opcodesbyname = OpCodeLookup.OpCodesByName; List<object> args = new List<object>(); List<ILVariable> iLVariables = new List<ILVariable>(); var filters = OpCodeFilters.BuildEmptyStackWithArgsAndLocalsAndInlineOnlyOperandFilters(args.ToArray(), iLVariables.ToArray()); int expected = 12; var rem = allOpCodes.Where(x => filters.All((filter) => filter(x))).ToList(); Assert.IsTrue(rem.Count == 12); for (var argCount = 1; argCount < 5; argCount++) { args.Add(argCount - 1); filters = OpCodeFilters.BuildEmptyStackWithArgsAndLocalsAndInlineOnlyOperandFilters(args.ToArray(), iLVariables.ToArray()); expected += 1; rem = allOpCodes.Where(x => filters.All((filter) => filter(x))).ToList(); Assert.IsTrue(rem.Count == expected); } for (var variableCount = 1; variableCount < 5; variableCount++) { iLVariables.Add(new ILVariable { Name = $"var{variableCount}", Index = variableCount - 1, Type = typeof(int), Value = variableCount - 1 }); filters = OpCodeFilters.BuildEmptyStackWithArgsAndLocalsAndInlineOnlyOperandFilters(args.ToArray(), iLVariables.ToArray()); expected += 1; rem = allOpCodes.Where(x => filters.All((filter) => filter(x))).ToList(); Assert.IsTrue(rem.Count == expected); } //rem = allOpCodes.ToList(); ; for (var i = 0; i < rem.Count; i++) { var opCodes = rem.Skip(i).Take(1).ToList(); opCodes.Add(OpCodes.Ret); var result = ILStackFrameBuilder.BuildAndExecute(opCodes, args: args.ToArray(), locals: iLVariables.ToArray()); Assert.IsTrue(result.ExecutedInstructions == 2); Assert.IsNull(result.Exception); } }
public void TestEmitSwitch() { var endSwitchInstruction = ILInstruction.Create(ILOpCodeValues.Nop); endSwitchInstruction.Label = 2; var addInstructions = new[] { ILInstruction.Create(ILOpCodeValues.Add), ILInstruction.Create(ILOpCodeValues.Br_S, endSwitchInstruction) }; var subInstructions = new[] { ILInstruction.Create(ILOpCodeValues.Sub), ILInstruction.Create(ILOpCodeValues.Br_S, endSwitchInstruction) }; addInstructions[0].Label = 0; subInstructions[0].Label = 1; var exceptionType = typeof(ArgumentOutOfRangeException); var ctor = exceptionType.GetConstructor(Type.EmptyTypes); var defaultInstuctions = new[] { ILInstruction.Create(ILOpCodeValues.Newobj, ctor.MetadataToken), ILInstruction.Create(ILOpCodeValues.Throw) }; var switchInstuction = ILInstruction.Create(ILOpCodeValues.Switch, new[] { addInstructions[0], subInstructions[0] }); var builder = new ILInstructionBuilder(); //var b= arg[b]; builder.Write(ILOpCodeValues.Ldarg_1); //var a= arg[1]; builder.Write(ILOpCodeValues.Ldarg_2); //switch(arg[0]) builder.Write(ILOpCodeValues.Ldarg_0); builder.Write(switchInstuction); //case default builder.Write(defaultInstuctions); //case 0: add builder.Write(addInstructions); //case 1: sub builder.Write(subInstructions); builder.Write(endSwitchInstruction); builder.Write(ILOpCodeValues.Ret); var frame = ILStackFrameBuilder.Build(builder.Instructions); frame.Args = new object[] { 0, 1, 2 }; frame.Execute(); var expected = 3; Assert.IsNull(frame.Exception, $"Executing switch: add throw an exception {frame?.Exception}"); Assert.IsTrue(frame.Stack.Count == 0, "Stack was not cleared executing switch: add"); Assert.IsTrue((int)frame.ReturnResult == expected, $"Actual: {frame.ReturnResult}\r\nExpected: {expected}"); expected = -1; frame.Args = new object[] { 1, 1, 2 }; frame.Execute(); Assert.IsNull(frame.Exception, $"Executing switch: add throw an exception {frame?.Exception}"); Assert.IsTrue(frame.Stack.Count == 0, "Stack was not cleared executing switch: add"); Assert.IsTrue((int)frame.ReturnResult == expected, $"Actual: {frame.ReturnResult}\r\nExpected: {expected}"); frame.Args = new object[] { 2, 1, 2 }; frame.Execute(); Assert.IsNotNull(frame.Exception, $"Executing switch failed to execute default case to and throw and exception."); Assert.IsInstanceOfType(frame.Exception, typeof(ArgumentOutOfRangeException), $"Frame failed to throw {nameof(ArgumentOutOfRangeException)}"); Assert.IsNull(frame.ReturnResult, $"Actual: {frame.ReturnResult}\r\nExpected: [null]"); //var type = BuildSwitchTestType(); //var switchMethod = type.GetMethod("SwitchTest"); //Assert.IsNotNull(switchMethod); //var instructions = ILInstructionReader.FromMethod(switchMethod); var ilMethod = new ILMethod(MethodBase.GetCurrentMethod().Name, expected.GetType()); ilMethod.AddParameters(new[] { typeof(int), typeof(int), typeof(int) }); ilMethod.AddInstructions(builder.Instructions.ToArray()); ilMethod.Module = exceptionType.Module; var method = ilMethod.Compile(); var actual = method.Invoke(null, new object[] { 0, 1, 2 }); expected = 3; Assert.IsTrue((int)actual == expected, $"Actual: {actual}\r\nExpected:{expected}"); actual = method.Invoke(null, new object[] { 1, 1, 2 }); expected = -1; Assert.IsTrue((int)actual == expected, $"Actual: {actual}\r\nExpected:{expected}"); Exception exception = null; try { actual = method.Invoke(null, new object[] { 2, 1, 2 }); } catch (TargetInvocationException ex) { exception = ex.InnerException; } Assert.IsNotNull(exception, $"Failed to catch argument exception"); Assert.IsInstanceOfType(exception, exceptionType); }
public void TestEmitCompiledSwitch() { var compiledType = BuildSwitchTestType(); var compiledMethod = compiledType.GetMethod("SwitchTest"); Assert.IsNotNull(compiledMethod); var compiledInstructions = ILInstructionReader.FromMethod(compiledMethod).ToArray(); //TODO: auto label read instructions. //mark default case jump target compiledInstructions[7].Label = 0; //set default case jump target compiledInstructions[2].Arg = compiledInstructions[7]; // set break target; compiledInstructions[8].Label = 1; //set jump targets for switch breaks statements compiledInstructions[4].Arg = compiledInstructions[6].Arg = compiledInstructions[8]; //mark switch jump targets; compiledInstructions[3].Label = 2; compiledInstructions[5].Label = 3; //set switch jump targets = compiledInstructions[1].Arg = new[] { compiledInstructions[3], compiledInstructions[5] }; var builder = new ILInstructionBuilder(); builder.Write(compiledInstructions.ToArray()); //TODO: implement auto fixup of instuctions. var frame = ILStackFrameBuilder.Build(builder.Instructions); frame.Args = new object[] { 1 }; frame.Reset(); int Position = -1; var jumpTable = frame.Stream.ToDictionary(x => (int)x.ByteIndex, x => ++ Position); frame.Execute(); var expected = 1; Assert.IsNull(frame.Exception, $"Executing switch: add throw an exception {frame?.Exception}"); Assert.IsTrue(frame.Stack.Count == 0, "Stack was not cleared executing switch: add"); Assert.IsTrue((int)frame.ReturnResult == expected, $"Actual: {frame.ReturnResult}\r\nExpected: {expected}"); frame.Args = new object[] { 0 }; frame.Execute(); expected = 0; Assert.IsNull(frame.Exception, $"Executing switch: add throw an exception {frame?.Exception}"); Assert.IsTrue(frame.Stack.Count == 0, "Stack was not cleared executing switch: add"); Assert.IsTrue((int)frame.ReturnResult == expected, $"Actual: {frame.ReturnResult}\r\nExpected: {expected}"); frame.Args = new object[] { 2 }; frame.Execute(); expected = 2; Assert.IsNull(frame.Exception, $"Executing switch: add throw an exception {frame?.Exception}"); Assert.IsTrue(frame.Stack.Count == 0, "Stack was not cleared executing switch: add"); Assert.IsTrue((int)frame.ReturnResult == expected, $"Actual: {frame.ReturnResult}\r\nExpected: {expected}"); }
public static void Run() { Console.WriteLine("Beginning q-learning maze"); Console.WriteLine("Setting up state"); int numStates = QOpCodeLearingGenerator.OpCodes.Count; var qMaze = QMaze.CreateDemo(numStates); //CreateMaze(numStates); qMaze.Start = 0; double[][] rewardMatrix = CreateRewards(qMaze.NumStates); double[][] qualityMaxtrix = CreateQuality(qMaze.NumStates); qMaze.Goal = QOpCodeLearingGenerator.RetIndex; // 11; double gamma = .5; //discount factor double learnRate = .5; int maxEpochs = 100000; //var args = new dynamic[] { "hello world" }; var argList = new List <object>(); argList.Add(new[] { 1, 2 }); var expected = new[] { 2, 1 };// args[0]; var hardCoded = new List <OpCode>(); hardCoded.Add(OpCodes.Ldarg_0); hardCoded.Add(OpCodes.Ldc_I4_1); hardCoded.Add(OpCodes.Ldarg_0); hardCoded.Add(OpCodes.Ldc_I4_0); hardCoded.Add(OpCodes.Ldelem); hardCoded.Add(OpCodes.Ldarg_0); hardCoded.Add(OpCodes.Ldc_I4_0); hardCoded.Add(OpCodes.Ldarg_0); hardCoded.Add(OpCodes.Ldc_I4_1); hardCoded.Add(OpCodes.Ldelem); hardCoded.Add(OpCodes.Stelem); hardCoded.Add(OpCodes.Stelem); hardCoded.Add(OpCodes.Ldarg_0); hardCoded.Add(OpCodes.Ret); var hcResult = ILStackFrameBuilder.BuildAndExecute(hardCoded, args: argList.ToArray()); Train(qMaze, rewardMatrix, qualityMaxtrix, qMaze.Goal, gamma, learnRate, maxEpochs, expected, argList.ToArray()); Console.WriteLine("Done."); //Print(qualityMaxtrix); Console.WriteLine("Solution"); Walk(qMaze, qualityMaxtrix); Console.WriteLine("End demo"); Console.ReadLine(); }
/// <summary> /// The key update equation for Q-learning is based on the mathematical Bellman equation /// </summary> /// <param name="qMaze"></param> /// <param name="rewards"></param> /// <param name="quality"></param> /// <param name="goal"></param> /// <param name="gamma"></param> /// <param name="learnRate"></param> /// <param name="maxEpochs"></param> private static void Train(QMaze qMaze, double[][] rewards, double[][] quality, int goal, double gamma, double learnRate, int maxEpochs, dynamic expectedResult, params dynamic[] args) { /* * loop maxEpochs times * set currState = a random state * while currState != goalState * pick a random next-state but don't move yet * find largest Q for all next-next-states * update Q[currState][nextState] using Bellman * move to nextState * end-while * end-loop */ var stack = new Stack <object>(); dynamic rewardValue = expectedResult; int maxOpCodeLength = 10; for (int epoch = 0; epoch < maxEpochs; ++epoch) { int startState = rnd.Next(0, rewards.Length); var currentOpCode = QOpCodeLearingGenerator.OpCodes[startState]; Console.Title = $"Epoch {epoch} of {maxEpochs} : {currentOpCode.Name}"; //Console.WriteLine($"testing {currentOpCode}"); if (currentOpCode.Name == ILOpCodeValueNativeNames.Ldc_I4_1) { string bp = ""; } var l = new List <OpCode>(); l.Add(currentOpCode); //The number of training epochs must be determined by trial and error. // An alternative design is to iterate until the values in // the Q matrix don’t change, or until they stabilize to very small changes //per iteration. int currState = startState; while (true && l.Count < maxOpCodeLength) { int nextState = GetRandNextState(currState, qMaze); var opCode = QOpCodeLearingGenerator.OpCodes[nextState]; l.Add(opCode); //TODO: make this smarter //List<int> possNextNextStates = GetPossibleNextStates(nextState, qMaze); List <int> possNextNextStates = QOpCodeLearingGenerator.OpIndexes; double maxQ = double.MinValue; for (int j = 0; j < possNextNextStates.Count; ++j) { int nns = possNextNextStates[j]; // short alias double q = quality[nextState][nns]; if (q > maxQ) { maxQ = q; } } /* * Imagine you’re in a maze. You see that you can go to three different rooms, A, B, C. * You pick B, but don’t move yet. * You ask a friend to go into room B and the friend tells you * that from room B you can go to rooms X, Y, Z and that of those * rooms Y has the best Q value. In other words, Y is the best next-next state. * */ ////refactor to evaluate if would return reward. /* * quality[currState][nextState] = * ((1 - learnRate) * quality[currState][nextState]) + * (learnRate * (rewards[currState][nextState] + (gamma * maxQ))); */ double reward = -.1; if (nextState == QOpCodeLearingGenerator.RetIndex) { var frame = ILStackFrameBuilder.BuildAndExecute(l, 3, args: args); if (frame.Exception != null) { reward = -.2; } else if (frame.ReturnResult != null) { var type = frame.ReturnResult.GetType(); var expectedType = expectedResult.GetType(); if (type == expectedType) { try { if (frame.ReturnResult == expectedResult) { reward = 1; var rewardSteps = string.Join(", ", l.ToArray()); Console.WriteLine($"Found reward {rewardSteps}."); } } catch (Exception ex) { } } } //var result = ExecuteOpCodes(l, timeoutSeconds: 3, args); //if (result.Error != null) // need to penalize errors. //{ // reward = -.2; //} //else if (result != null) //{ // if (result.Success && result.Result != null) // { // var type = result.Result.GetType(); // try // { // if (result.Result == expectedResult) // { // reward = 1; // var rewardSteps = string.Join(", ", l.ToArray()); // Console.WriteLine($"Found reward {rewardSteps}."); // } // } // catch (Exception ex) // { // } // } //} } quality[currState][nextState] = ((1 - learnRate) * quality[currState][nextState]) + (learnRate * (reward + (gamma * maxQ))); currState = nextState; if (currState == goal) { break; } /* * The update equation has two parts. * The first part, ((1 - lrnRate) * Q[currState][nextState]), is called the exploit component * and adds a fraction of the old value. * * The second part, (lrnRate * (R[currState][nextState] + (gamma * maxQ))), * is called the explore component. * * Larger values of the lrnRate increase the influence of both current rewards and * future rewards (explore) at the expense of past rewards (exploit). * The value of gamma, the discount factor, influences the importance of future rewards. * */ } } }