示例#1
0
        internal static void TestStackFrameBuilder()
        {
            var opcodes = new List <OpCode>
            {
                OpCodes.Nop,
            };


            var result = ILStackFrameBuilder.BuildAndExecute(opcodes);

            opcodes.Add(OpCodes.Ldarg_0);

            var result2 = ILStackFrameBuilder.BuildAndExecute(opcodes, args: new object[] { 1 });

            System.Diagnostics.Debug.Assert(((int)result2.ReturnResult) == 1);
        }
示例#2
0
        public void BuildAndExecuteOpCodesWithTimeoutTest()
        {
            var opCode1 = OpCodes.Ldc_I4_1;
            var opCode2 = OpCodes.Ret;
            var opCodes = (new[] { opCode1, opCode2 }).ToList();
            var frame   = ILStackFrameBuilder.BuildAndExecute(opCodes, 1);

            Assert.IsTrue(frame.Stream.Count == 2);
            Assert.IsTrue(frame.Stream[0].OpCode == opCode1);
            Assert.IsTrue(frame.Stream[1].OpCode == opCode2);
            var actual = frame.ReturnResult;

            Assert.IsNotNull(actual);
            var expected = 1;

            Assert.IsTrue((int)actual == expected, $"Actual:{actual}\r\nExpected:{expected}\r\n");
        }
示例#3
0
        public void BuildAndExecuteILInstructionsTest()
        {
            var opCode1      = OpCodes.Ldc_I4_1;
            var opCode2      = OpCodes.Ret;
            var instruction1 = ILInstruction.Create(opCode1);
            var instruction2 = ILInstruction.Create(opCode2);
            var instructions = (new[] { instruction1, instruction2 }).ToList();
            var frame        = ILStackFrameBuilder.BuildAndExecute(instructions);

            Assert.IsTrue(frame.Stream.Count == 2);
            Assert.IsTrue(frame.Stream[0].OpCode == opCode1);
            Assert.IsTrue(frame.Stream[1].OpCode == opCode2);
            var actual = frame.ReturnResult;

            Assert.IsNotNull(actual);
            var expected = 1;

            Assert.IsTrue((int)actual == expected, $"Actual:{actual}\r\nExpected:{expected}\r\n");
        }
        public void TestEmptyStackWithoutArgsOrLocalsAndNoOperandExclusions()
        {
            var allOpCodes = OpCodeLookup.OpCodes.Select(x => x.Value).AsQueryable<OpCode>();

            var opcodesbyname = OpCodeLookup.OpCodesByName;

            var filters = OpCodeFilters.EmptyStackWithNoArgsLocalsAndNoInlineOperandFilters();
            allOpCodes = allOpCodes.Where(x => filters.All((filter) => filter(x)));

            var rem = allOpCodes.ToList();
            Assert.IsTrue(rem.Count == 12);
            for (var i = 0; i < rem.Count; i++)
            {
                var opCodes = rem.Skip(i).Take(1).ToList();
                opCodes.Add(OpCodes.Ret);
                var result = ILStackFrameBuilder.BuildAndExecute(opCodes);
                Assert.IsTrue(result.ExecutedInstructions == 2);
                Assert.IsNull(result.Exception);
            }
        }
        public void TestBuildEmptyStackWithArgsAndLocalsAndInlineOnlyOperandFilters()
        {
            var allOpCodes = OpCodeLookup.OpCodes.Select(x => x.Value).AsQueryable<OpCode>();

            var opcodesbyname = OpCodeLookup.OpCodesByName;

            List<object> args = new List<object>();
            List<ILVariable> iLVariables = new List<ILVariable>();
            var filters = OpCodeFilters.BuildEmptyStackWithArgsAndLocalsAndInlineOnlyOperandFilters(args.ToArray(), iLVariables.ToArray());
            int expected = 12;
            var rem = allOpCodes.Where(x => filters.All((filter) => filter(x))).ToList();

            Assert.IsTrue(rem.Count == 12);
            for (var argCount = 1; argCount < 5; argCount++)
            {
                args.Add(argCount - 1);
                filters = OpCodeFilters.BuildEmptyStackWithArgsAndLocalsAndInlineOnlyOperandFilters(args.ToArray(), iLVariables.ToArray());
                expected += 1;
                rem = allOpCodes.Where(x => filters.All((filter) => filter(x))).ToList();
                Assert.IsTrue(rem.Count == expected);
            }
            for (var variableCount = 1; variableCount < 5; variableCount++)
            {
                iLVariables.Add(new ILVariable { Name = $"var{variableCount}", Index = variableCount - 1, Type = typeof(int), Value = variableCount - 1 });
                filters = OpCodeFilters.BuildEmptyStackWithArgsAndLocalsAndInlineOnlyOperandFilters(args.ToArray(), iLVariables.ToArray());
                expected += 1;
                rem = allOpCodes.Where(x => filters.All((filter) => filter(x))).ToList();
                Assert.IsTrue(rem.Count == expected);
            }

            //rem = allOpCodes.ToList();
            ;
            for (var i = 0; i < rem.Count; i++)
            {
                var opCodes = rem.Skip(i).Take(1).ToList();
                opCodes.Add(OpCodes.Ret);
                var result = ILStackFrameBuilder.BuildAndExecute(opCodes, args: args.ToArray(), locals: iLVariables.ToArray());
                Assert.IsTrue(result.ExecutedInstructions == 2);
                Assert.IsNull(result.Exception);
            }
        }
示例#6
0
        public static void Run()
        {
            Console.WriteLine("Beginning q-learning maze");

            Console.WriteLine("Setting up state");



            int numStates = QOpCodeLearingGenerator.OpCodes.Count;
            var qMaze     = QMaze.CreateDemo(numStates); //CreateMaze(numStates);

            qMaze.Start = 0;
            double[][] rewardMatrix   = CreateRewards(qMaze.NumStates);
            double[][] qualityMaxtrix = CreateQuality(qMaze.NumStates);

            qMaze.Goal = QOpCodeLearingGenerator.RetIndex; // 11;
            double gamma     = .5;                         //discount factor
            double learnRate = .5;
            int    maxEpochs = 100000;

            //var args = new dynamic[] { "hello world" };

            var argList = new List <object>();

            argList.Add(new[] { 1, 2 });
            var expected = new[] { 2, 1 };// args[0];


            var hardCoded = new List <OpCode>();

            hardCoded.Add(OpCodes.Ldarg_0);
            hardCoded.Add(OpCodes.Ldc_I4_1);
            hardCoded.Add(OpCodes.Ldarg_0);
            hardCoded.Add(OpCodes.Ldc_I4_0);
            hardCoded.Add(OpCodes.Ldelem);


            hardCoded.Add(OpCodes.Ldarg_0);
            hardCoded.Add(OpCodes.Ldc_I4_0);
            hardCoded.Add(OpCodes.Ldarg_0);
            hardCoded.Add(OpCodes.Ldc_I4_1);
            hardCoded.Add(OpCodes.Ldelem);

            hardCoded.Add(OpCodes.Stelem);
            hardCoded.Add(OpCodes.Stelem);
            hardCoded.Add(OpCodes.Ldarg_0);
            hardCoded.Add(OpCodes.Ret);

            var hcResult = ILStackFrameBuilder.BuildAndExecute(hardCoded, args: argList.ToArray());


            Train(qMaze, rewardMatrix, qualityMaxtrix, qMaze.Goal, gamma, learnRate, maxEpochs, expected, argList.ToArray());

            Console.WriteLine("Done.");
            //Print(qualityMaxtrix);

            Console.WriteLine("Solution");

            Walk(qMaze, qualityMaxtrix);

            Console.WriteLine("End demo");
            Console.ReadLine();
        }
示例#7
0
        /// <summary>
        /// The key update equation for Q-learning is based on the mathematical Bellman equation
        /// </summary>
        /// <param name="qMaze"></param>
        /// <param name="rewards"></param>
        /// <param name="quality"></param>
        /// <param name="goal"></param>
        /// <param name="gamma"></param>
        /// <param name="learnRate"></param>
        /// <param name="maxEpochs"></param>
        private static void Train(QMaze qMaze,
                                  double[][] rewards,
                                  double[][] quality,
                                  int goal,
                                  double gamma,
                                  double learnRate,
                                  int maxEpochs,
                                  dynamic expectedResult,
                                  params dynamic[] args)
        {
            /*
             * loop maxEpochs times
             * set currState = a random state
             * while currState != goalState
             * pick a random next-state but don't move yet
             * find largest Q for all next-next-states
             * update Q[currState][nextState] using Bellman
             * move to nextState
             * end-while
             * end-loop
             */

            var     stack           = new Stack <object>();
            dynamic rewardValue     = expectedResult;
            int     maxOpCodeLength = 10;

            for (int epoch = 0; epoch < maxEpochs; ++epoch)
            {
                int startState = rnd.Next(0, rewards.Length);

                var currentOpCode = QOpCodeLearingGenerator.OpCodes[startState];


                Console.Title = $"Epoch {epoch} of {maxEpochs} : {currentOpCode.Name}";
                //Console.WriteLine($"testing {currentOpCode}");
                if (currentOpCode.Name == ILOpCodeValueNativeNames.Ldc_I4_1)
                {
                    string bp = "";
                }
                var l = new List <OpCode>();
                l.Add(currentOpCode);
                //The number of training epochs must be determined by trial and error.
                // An alternative design is to iterate until the values in
                // the Q matrix don’t change, or until they stabilize to very small changes
                //per iteration.
                int currState = startState;
                while (true && l.Count < maxOpCodeLength)
                {
                    int nextState = GetRandNextState(currState, qMaze);
                    var opCode    = QOpCodeLearingGenerator.OpCodes[nextState];
                    l.Add(opCode);
                    //TODO: make this smarter
                    //List<int> possNextNextStates = GetPossibleNextStates(nextState, qMaze);
                    List <int> possNextNextStates = QOpCodeLearingGenerator.OpIndexes;
                    double     maxQ = double.MinValue;
                    for (int j = 0; j < possNextNextStates.Count; ++j)
                    {
                        int    nns = possNextNextStates[j]; // short alias
                        double q   = quality[nextState][nns];
                        if (q > maxQ)
                        {
                            maxQ = q;
                        }
                    }

                    /*
                     * Imagine you’re in a maze. You see that you can go to three different rooms, A, B, C.
                     * You pick B, but don’t move yet.
                     * You ask a friend to go into room B and the friend tells you
                     * that from room B you can go to rooms X, Y, Z and that of those
                     * rooms Y has the best Q value. In other words, Y is the best next-next state.
                     * */


                    ////refactor to evaluate if would return reward.

                    /*
                     * quality[currState][nextState] =
                     *  ((1 - learnRate) * quality[currState][nextState]) +
                     *  (learnRate * (rewards[currState][nextState] + (gamma * maxQ)));
                     */

                    double reward = -.1;
                    if (nextState == QOpCodeLearingGenerator.RetIndex)
                    {
                        var frame = ILStackFrameBuilder.BuildAndExecute(l, 3, args: args);
                        if (frame.Exception != null)
                        {
                            reward = -.2;
                        }
                        else if (frame.ReturnResult != null)
                        {
                            var type         = frame.ReturnResult.GetType();
                            var expectedType = expectedResult.GetType();
                            if (type == expectedType)
                            {
                                try
                                {
                                    if (frame.ReturnResult == expectedResult)
                                    {
                                        reward = 1;
                                        var rewardSteps = string.Join(", ", l.ToArray());
                                        Console.WriteLine($"Found reward {rewardSteps}.");
                                    }
                                }
                                catch (Exception ex)
                                {
                                }
                            }
                        }
                        //var result = ExecuteOpCodes(l, timeoutSeconds: 3, args);
                        //if (result.Error != null) // need to penalize errors.
                        //{
                        //    reward = -.2;
                        //}
                        //else if (result != null)
                        //{
                        //    if (result.Success && result.Result != null)
                        //    {
                        //        var type = result.Result.GetType();
                        //        try
                        //        {
                        //            if (result.Result == expectedResult)
                        //            {
                        //                reward = 1;
                        //                var rewardSteps = string.Join(", ", l.ToArray());
                        //                Console.WriteLine($"Found reward {rewardSteps}.");
                        //            }
                        //        }
                        //        catch (Exception ex)
                        //        {

                        //        }


                        //    }
                        //}
                    }

                    quality[currState][nextState] =
                        ((1 - learnRate) * quality[currState][nextState]) +
                        (learnRate * (reward + (gamma * maxQ)));



                    currState = nextState;
                    if (currState == goal)
                    {
                        break;
                    }

                    /*
                     * The update equation has two parts.
                     * The first part, ((1 - lrnRate) * Q[currState][nextState]), is called the exploit component
                     * and adds a fraction of the old value.
                     *
                     * The second part, (lrnRate * (R[currState][nextState] + (gamma * maxQ))),
                     * is called the explore component.
                     *
                     * Larger values of the lrnRate increase the influence of both current rewards and
                     * future rewards (explore) at the expense of past rewards (exploit).
                     * The value of gamma, the discount factor, influences the importance of future rewards.
                     * */
                }
            }
        }