/// <summary>Gets whether the specified opcode may incur backtracking.</summary> public static bool OpcodeBacktracks(RegexOpcode opcode) { opcode &= RegexOpcode.OperatorMask; switch (opcode) { case RegexOpcode.Oneloop: case RegexOpcode.Onelazy: case RegexOpcode.Notoneloop: case RegexOpcode.Notonelazy: case RegexOpcode.Setloop: case RegexOpcode.Setlazy: case RegexOpcode.Lazybranch: case RegexOpcode.Branchmark: case RegexOpcode.Lazybranchmark: case RegexOpcode.Nullcount: case RegexOpcode.Setcount: case RegexOpcode.Branchcount: case RegexOpcode.Lazybranchcount: case RegexOpcode.Setmark: case RegexOpcode.Capturemark: case RegexOpcode.Getmark: case RegexOpcode.Setjump: case RegexOpcode.Backjump: case RegexOpcode.Forejump: case RegexOpcode.Goto: return(true); default: return(false); } }
/// <summary> /// Emits a zero-argument operation. Note that the emit /// functions all run in two modes: they can emit code, or /// they can just count the size of the code. /// </summary> private void Emit(RegexOpcode op) { if (RegexInterpreterCode.OpcodeBacktracks(op)) { _trackCount++; } _emitted.Append((int)op); }
/// <summary>Emits a one-argument operation.</summary> private void Emit(RegexOpcode op, int opd1) { if (RegexCode.OpcodeBacktracks(op)) { _trackCount++; } _emitted.Append((int)op); _emitted.Append(opd1); }
/// <summary> /// Gets the assembled machine code for section of code /// </summary> /// <param name="code">The lines of code for this section</param> /// <param name="labelDict">A dict of labels and their absolute compiled positions</param> /// <param name="regexOpcodes">A list of RegexOpcodes where each RegexOpcode matches with each corresponding line of code</param> /// <returns></returns> private static byte[] GetCode(string[] code, Dictionary<string, ushort> labelDict, RegexOpcode[] regexOpcodes) { var output = new List<byte>(); for (int i = 0; i < code.Length; i++) { RegexOpcode op = regexOpcodes[i]; if (op.Prefix != null) output.Add((byte)op.Prefix); output.Add(op.Code); if (op.BytesFollowing > 0) { Match m = op.Regex.Match(code[i]); // Group 1 is the number, either n, -n, nn, -nn, h or hh or a label int n; if (!int.TryParse(m.Groups[1].Value, out n)) { // Special case for JP opcode, as it can have a label to jump to var first2Chars = op.Op.Substring(0, 2); if (first2Chars == "JP" || first2Chars == "CA") n = labelDict[m.Groups[1].Value]; else throw new ApplicationException(string.Format("The value {0} cannot be parsed", m.Groups[1].Value)); } if (op.BytesFollowing >= 1) output.Add(n > 0 ? (byte)n : (byte)(sbyte)n); int top = n >> 8; if (op.BytesFollowing == 2) output.Add(top > 0 ? (byte)top : (byte)(sbyte)top); } } return output.ToArray(); }
/// <summary>Gets the number of integers required to store an operation represented by the specified opcode (including the opcode).</summary> /// <returns>Values range from 1 (just the opcode) to 3 (the opcode plus up to two operands).</returns> public static int OpcodeSize(RegexOpcode opcode) { opcode &= RegexOpcode.OperatorMask; switch (opcode) { case RegexOpcode.Nothing: case RegexOpcode.Bol: case RegexOpcode.Eol: case RegexOpcode.Boundary: case RegexOpcode.NonBoundary: case RegexOpcode.ECMABoundary: case RegexOpcode.NonECMABoundary: case RegexOpcode.Beginning: case RegexOpcode.Start: case RegexOpcode.EndZ: case RegexOpcode.End: case RegexOpcode.Nullmark: case RegexOpcode.Setmark: case RegexOpcode.Getmark: case RegexOpcode.Setjump: case RegexOpcode.Backjump: case RegexOpcode.Forejump: case RegexOpcode.Stop: case RegexOpcode.UpdateBumpalong: // The opcode has no operands. return(1); case RegexOpcode.One: case RegexOpcode.Notone: case RegexOpcode.Multi: case RegexOpcode.Backreference: case RegexOpcode.TestBackreference: case RegexOpcode.Goto: case RegexOpcode.Nullcount: case RegexOpcode.Setcount: case RegexOpcode.Lazybranch: case RegexOpcode.Branchmark: case RegexOpcode.Lazybranchmark: case RegexOpcode.Set: // The opcode has one operand. return(2); case RegexOpcode.Capturemark: case RegexOpcode.Branchcount: case RegexOpcode.Lazybranchcount: case RegexOpcode.Onerep: case RegexOpcode.Notonerep: case RegexOpcode.Oneloop: case RegexOpcode.Oneloopatomic: case RegexOpcode.Notoneloop: case RegexOpcode.Notoneloopatomic: case RegexOpcode.Onelazy: case RegexOpcode.Notonelazy: case RegexOpcode.Setlazy: case RegexOpcode.Setrep: case RegexOpcode.Setloop: case RegexOpcode.Setloopatomic: // The opcode has two operands. return(3); default: Debug.Fail($"Unknown opcode: {opcode}"); goto case RegexOpcode.Stop; } }
internal string DescribeInstruction(int opcodeOffset) { RegexOpcode opcode = (RegexOpcode)Codes[opcodeOffset]; var sb = new StringBuilder(); sb.Append($"{opcodeOffset:D6} "); sb.Append(OpcodeBacktracks(opcode & RegexOpcode.OperatorMask) ? '~' : ' '); sb.Append(opcode & RegexOpcode.OperatorMask); if ((opcode & RegexOpcode.CaseInsensitive) != 0) { sb.Append("-Ci"); } if ((opcode & RegexOpcode.RightToLeft) != 0) { sb.Append("-Rtl"); } if ((opcode & RegexOpcode.Backtracking) != 0) { sb.Append("-Back"); } if ((opcode & RegexOpcode.BacktrackingSecond) != 0) { sb.Append("-Back2"); } opcode &= RegexOpcode.OperatorMask; switch (opcode) { case RegexOpcode.One: case RegexOpcode.Onerep: case RegexOpcode.Oneloop: case RegexOpcode.Oneloopatomic: case RegexOpcode.Onelazy: case RegexOpcode.Notone: case RegexOpcode.Notonerep: case RegexOpcode.Notoneloop: case RegexOpcode.Notoneloopatomic: case RegexOpcode.Notonelazy: sb.Append(Indent()).Append('\'').Append(RegexCharClass.DescribeChar((char)Codes[opcodeOffset + 1])).Append('\''); break; case RegexOpcode.Set: case RegexOpcode.Setrep: case RegexOpcode.Setloop: case RegexOpcode.Setloopatomic: case RegexOpcode.Setlazy: sb.Append(Indent()).Append(RegexCharClass.DescribeSet(Strings[Codes[opcodeOffset + 1]])); break; case RegexOpcode.Multi: sb.Append(Indent()).Append('"').Append(Strings[Codes[opcodeOffset + 1]]).Append('"'); break; case RegexOpcode.Backreference: case RegexOpcode.TestBackreference: sb.Append(Indent()).Append("index = ").Append(Codes[opcodeOffset + 1]); break; case RegexOpcode.Capturemark: sb.Append(Indent()).Append("index = ").Append(Codes[opcodeOffset + 1]); if (Codes[opcodeOffset + 2] != -1) { sb.Append(", unindex = ").Append(Codes[opcodeOffset + 2]); } break; case RegexOpcode.Nullcount: case RegexOpcode.Setcount: sb.Append(Indent()).Append("value = ").Append(Codes[opcodeOffset + 1]); break; case RegexOpcode.Goto: case RegexOpcode.Lazybranch: case RegexOpcode.Branchmark: case RegexOpcode.Lazybranchmark: case RegexOpcode.Branchcount: case RegexOpcode.Lazybranchcount: sb.Append(Indent()).Append("addr = ").Append(Codes[opcodeOffset + 1]); break; } switch (opcode) { case RegexOpcode.Onerep: case RegexOpcode.Oneloop: case RegexOpcode.Oneloopatomic: case RegexOpcode.Onelazy: case RegexOpcode.Notonerep: case RegexOpcode.Notoneloop: case RegexOpcode.Notoneloopatomic: case RegexOpcode.Notonelazy: case RegexOpcode.Setrep: case RegexOpcode.Setloop: case RegexOpcode.Setloopatomic: case RegexOpcode.Setlazy: sb.Append(", rep = ").Append(Codes[opcodeOffset + 2] == int.MaxValue ? "inf" : Codes[opcodeOffset + 2]); break; case RegexOpcode.Branchcount: case RegexOpcode.Lazybranchcount: sb.Append(", limit = ").Append(Codes[opcodeOffset + 2] == int.MaxValue ? "inf" : Codes[opcodeOffset + 2]); break; } return(sb.ToString()); string Indent() => new string(' ', Math.Max(1, 25 - sb.Length)); }
/// <summary> /// Goes through each line of code - if it is a valid opcode, it assigns a RegexOpcode to it. /// </summary> /// <param name="code"></param> /// <returns></returns> private static RegexOpcode[] GetRegexOpcodes(string[] code) { var regexOpcodes = new RegexOpcode[code.Length]; for (int i = 0; i < code.Length; i++) { RegexOpcode op = Program.Opcodes.FirstOrDefault(opcode => opcode.Regex.IsMatch(code[i])); if (op == null) throw new ApplicationException(string.Format("Line {0} is incorrect: '{1}' does not exist, or a number in it is too big/small/malformed for the instruction.", i, code[i])); regexOpcodes[i] = op; } return regexOpcodes; }