示例#1
0
        /// <summary>Gets whether the specified opcode may incur backtracking.</summary>
        public static bool OpcodeBacktracks(RegexOpcode opcode)
        {
            opcode &= RegexOpcode.OperatorMask;

            switch (opcode)
            {
            case RegexOpcode.Oneloop:
            case RegexOpcode.Onelazy:
            case RegexOpcode.Notoneloop:
            case RegexOpcode.Notonelazy:
            case RegexOpcode.Setloop:
            case RegexOpcode.Setlazy:
            case RegexOpcode.Lazybranch:
            case RegexOpcode.Branchmark:
            case RegexOpcode.Lazybranchmark:
            case RegexOpcode.Nullcount:
            case RegexOpcode.Setcount:
            case RegexOpcode.Branchcount:
            case RegexOpcode.Lazybranchcount:
            case RegexOpcode.Setmark:
            case RegexOpcode.Capturemark:
            case RegexOpcode.Getmark:
            case RegexOpcode.Setjump:
            case RegexOpcode.Backjump:
            case RegexOpcode.Forejump:
            case RegexOpcode.Goto:
                return(true);

            default:
                return(false);
            }
        }
示例#2
0
        /// <summary>
        /// Emits a zero-argument operation. Note that the emit
        /// functions all run in two modes: they can emit code, or
        /// they can just count the size of the code.
        /// </summary>
        private void Emit(RegexOpcode op)
        {
            if (RegexInterpreterCode.OpcodeBacktracks(op))
            {
                _trackCount++;
            }

            _emitted.Append((int)op);
        }
示例#3
0
        /// <summary>Emits a one-argument operation.</summary>
        private void Emit(RegexOpcode op, int opd1)
        {
            if (RegexCode.OpcodeBacktracks(op))
            {
                _trackCount++;
            }

            _emitted.Append((int)op);
            _emitted.Append(opd1);
        }
            /// <summary>
            /// Gets the assembled machine code for section of code
            /// </summary>
            /// <param name="code">The lines of code for this section</param>
            /// <param name="labelDict">A dict of labels and their absolute compiled positions</param>
            /// <param name="regexOpcodes">A list of RegexOpcodes where each RegexOpcode matches with each corresponding line of code</param>
            /// <returns></returns>
            private static byte[] GetCode(string[] code, Dictionary<string, ushort> labelDict, RegexOpcode[] regexOpcodes)
            {
                var output = new List<byte>();

                for (int i = 0; i < code.Length; i++)
                {
                    RegexOpcode op = regexOpcodes[i];

                    if (op.Prefix != null)
                        output.Add((byte)op.Prefix);

                    output.Add(op.Code);

                    if (op.BytesFollowing > 0)
                    {
                        Match m = op.Regex.Match(code[i]);

                        // Group 1 is the number, either n, -n, nn, -nn, h or hh or a label
                        int n;
                        if (!int.TryParse(m.Groups[1].Value, out n))
                        {
                            // Special case for JP opcode, as it can have a label to jump to
                            var first2Chars = op.Op.Substring(0, 2);
                            if (first2Chars == "JP" || first2Chars == "CA")
                                n = labelDict[m.Groups[1].Value];
                            else
                                throw new ApplicationException(string.Format("The value {0} cannot be parsed", m.Groups[1].Value));
                        }

                        if (op.BytesFollowing >= 1)
                            output.Add(n > 0 ? (byte)n : (byte)(sbyte)n);

                        int top = n >> 8;
                        if (op.BytesFollowing == 2)
                            output.Add(top > 0 ? (byte)top : (byte)(sbyte)top);
                    }
                }

                return output.ToArray();
            }
示例#5
0
        /// <summary>Gets the number of integers required to store an operation represented by the specified opcode (including the opcode).</summary>
        /// <returns>Values range from 1 (just the opcode) to 3 (the opcode plus up to two operands).</returns>
        public static int OpcodeSize(RegexOpcode opcode)
        {
            opcode &= RegexOpcode.OperatorMask;
            switch (opcode)
            {
            case RegexOpcode.Nothing:
            case RegexOpcode.Bol:
            case RegexOpcode.Eol:
            case RegexOpcode.Boundary:
            case RegexOpcode.NonBoundary:
            case RegexOpcode.ECMABoundary:
            case RegexOpcode.NonECMABoundary:
            case RegexOpcode.Beginning:
            case RegexOpcode.Start:
            case RegexOpcode.EndZ:
            case RegexOpcode.End:
            case RegexOpcode.Nullmark:
            case RegexOpcode.Setmark:
            case RegexOpcode.Getmark:
            case RegexOpcode.Setjump:
            case RegexOpcode.Backjump:
            case RegexOpcode.Forejump:
            case RegexOpcode.Stop:
            case RegexOpcode.UpdateBumpalong:
                // The opcode has no operands.
                return(1);

            case RegexOpcode.One:
            case RegexOpcode.Notone:
            case RegexOpcode.Multi:
            case RegexOpcode.Backreference:
            case RegexOpcode.TestBackreference:
            case RegexOpcode.Goto:
            case RegexOpcode.Nullcount:
            case RegexOpcode.Setcount:
            case RegexOpcode.Lazybranch:
            case RegexOpcode.Branchmark:
            case RegexOpcode.Lazybranchmark:
            case RegexOpcode.Set:
                // The opcode has one operand.
                return(2);

            case RegexOpcode.Capturemark:
            case RegexOpcode.Branchcount:
            case RegexOpcode.Lazybranchcount:
            case RegexOpcode.Onerep:
            case RegexOpcode.Notonerep:
            case RegexOpcode.Oneloop:
            case RegexOpcode.Oneloopatomic:
            case RegexOpcode.Notoneloop:
            case RegexOpcode.Notoneloopatomic:
            case RegexOpcode.Onelazy:
            case RegexOpcode.Notonelazy:
            case RegexOpcode.Setlazy:
            case RegexOpcode.Setrep:
            case RegexOpcode.Setloop:
            case RegexOpcode.Setloopatomic:
                // The opcode has two operands.
                return(3);

            default:
                Debug.Fail($"Unknown opcode: {opcode}");
                goto case RegexOpcode.Stop;
            }
        }
示例#6
0
        internal string DescribeInstruction(int opcodeOffset)
        {
            RegexOpcode opcode = (RegexOpcode)Codes[opcodeOffset];

            var sb = new StringBuilder();

            sb.Append($"{opcodeOffset:D6} ");
            sb.Append(OpcodeBacktracks(opcode & RegexOpcode.OperatorMask) ? '~' : ' ');
            sb.Append(opcode & RegexOpcode.OperatorMask);
            if ((opcode & RegexOpcode.CaseInsensitive) != 0)
            {
                sb.Append("-Ci");
            }
            if ((opcode & RegexOpcode.RightToLeft) != 0)
            {
                sb.Append("-Rtl");
            }
            if ((opcode & RegexOpcode.Backtracking) != 0)
            {
                sb.Append("-Back");
            }
            if ((opcode & RegexOpcode.BacktrackingSecond) != 0)
            {
                sb.Append("-Back2");
            }

            opcode &= RegexOpcode.OperatorMask;

            switch (opcode)
            {
            case RegexOpcode.One:
            case RegexOpcode.Onerep:
            case RegexOpcode.Oneloop:
            case RegexOpcode.Oneloopatomic:
            case RegexOpcode.Onelazy:
            case RegexOpcode.Notone:
            case RegexOpcode.Notonerep:
            case RegexOpcode.Notoneloop:
            case RegexOpcode.Notoneloopatomic:
            case RegexOpcode.Notonelazy:
                sb.Append(Indent()).Append('\'').Append(RegexCharClass.DescribeChar((char)Codes[opcodeOffset + 1])).Append('\'');
                break;

            case RegexOpcode.Set:
            case RegexOpcode.Setrep:
            case RegexOpcode.Setloop:
            case RegexOpcode.Setloopatomic:
            case RegexOpcode.Setlazy:
                sb.Append(Indent()).Append(RegexCharClass.DescribeSet(Strings[Codes[opcodeOffset + 1]]));
                break;

            case RegexOpcode.Multi:
                sb.Append(Indent()).Append('"').Append(Strings[Codes[opcodeOffset + 1]]).Append('"');
                break;

            case RegexOpcode.Backreference:
            case RegexOpcode.TestBackreference:
                sb.Append(Indent()).Append("index = ").Append(Codes[opcodeOffset + 1]);
                break;

            case RegexOpcode.Capturemark:
                sb.Append(Indent()).Append("index = ").Append(Codes[opcodeOffset + 1]);
                if (Codes[opcodeOffset + 2] != -1)
                {
                    sb.Append(", unindex = ").Append(Codes[opcodeOffset + 2]);
                }
                break;

            case RegexOpcode.Nullcount:
            case RegexOpcode.Setcount:
                sb.Append(Indent()).Append("value = ").Append(Codes[opcodeOffset + 1]);
                break;

            case RegexOpcode.Goto:
            case RegexOpcode.Lazybranch:
            case RegexOpcode.Branchmark:
            case RegexOpcode.Lazybranchmark:
            case RegexOpcode.Branchcount:
            case RegexOpcode.Lazybranchcount:
                sb.Append(Indent()).Append("addr = ").Append(Codes[opcodeOffset + 1]);
                break;
            }

            switch (opcode)
            {
            case RegexOpcode.Onerep:
            case RegexOpcode.Oneloop:
            case RegexOpcode.Oneloopatomic:
            case RegexOpcode.Onelazy:
            case RegexOpcode.Notonerep:
            case RegexOpcode.Notoneloop:
            case RegexOpcode.Notoneloopatomic:
            case RegexOpcode.Notonelazy:
            case RegexOpcode.Setrep:
            case RegexOpcode.Setloop:
            case RegexOpcode.Setloopatomic:
            case RegexOpcode.Setlazy:
                sb.Append(", rep = ").Append(Codes[opcodeOffset + 2] == int.MaxValue ? "inf" : Codes[opcodeOffset + 2]);
                break;

            case RegexOpcode.Branchcount:
            case RegexOpcode.Lazybranchcount:
                sb.Append(", limit = ").Append(Codes[opcodeOffset + 2] == int.MaxValue ? "inf" : Codes[opcodeOffset + 2]);
                break;
            }

            return(sb.ToString());

            string Indent() => new string(' ', Math.Max(1, 25 - sb.Length));
        }
            /// <summary>
            /// Goes through each line of code - if it is a valid opcode, it assigns a RegexOpcode to it.
            /// </summary>
            /// <param name="code"></param>
            /// <returns></returns>
            private static RegexOpcode[] GetRegexOpcodes(string[] code)
            {
                var regexOpcodes = new RegexOpcode[code.Length];

                for (int i = 0; i < code.Length; i++)
                {
                    RegexOpcode op = Program.Opcodes.FirstOrDefault(opcode => opcode.Regex.IsMatch(code[i]));
                    if (op == null)
                        throw new ApplicationException(string.Format("Line {0} is incorrect: '{1}' does not exist, or a number in it is too big/small/malformed for the instruction.", i, code[i]));

                    regexOpcodes[i] = op;
                }

                return regexOpcodes;
            }