public Interpreter(ushort[] program) { this.program = program; this.qs = null; this.group_count = this.ReadProgramCount(1) + 1; this.match_min = this.ReadProgramCount(3); this.program_start = 7; this.groups = new int[this.group_count]; }
private bool Eval(Mode mode, ref int ref_ptr, int pc) { int ptr = ref_ptr; Begin: for (;;) { ushort word = program[pc]; OpCode op = (OpCode)(word & 0x00ff); OpFlags flags = (OpFlags)(word & 0xff00); switch (op) { case OpCode.Anchor: { int skip = program[pc + 1]; int anch_offset = program[pc + 2]; bool anch_reverse = (flags & OpFlags.RightToLeft) != 0; int anch_ptr = anch_reverse ? ptr - anch_offset : ptr + anch_offset; int anch_end = text_end - match_min + anch_offset; // maximum anchor position int anch_begin = 0; // the general case for an anchoring expression is at the bottom, however we // do some checks for the common cases before to save processing time. the current // optimizer only outputs three types of anchoring expressions: fixed position, // fixed substring, and no anchor. OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff); if (anch_op == OpCode.Position && skip == 6) // position anchor // Anchor // Position // True { switch ((Position)program[pc + 4]) { case Position.StartOfString: if (anch_reverse || anch_offset == 0) { if (anch_reverse) { ptr = anch_offset; } if (TryMatch(ref ptr, pc + skip)) { goto Pass; } } break; case Position.StartOfLine: if (anch_ptr == 0) { ptr = 0; if (TryMatch(ref ptr, pc + skip)) { goto Pass; } ++anch_ptr; } while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) { if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') { if (anch_reverse) { ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset; } else { ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset; } if (TryMatch(ref ptr, pc + skip)) { goto Pass; } } if (anch_reverse) { --anch_ptr; } else { ++anch_ptr; } } break; case Position.StartOfScan: if (anch_ptr == scan_ptr) { ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset; if (TryMatch(ref ptr, pc + skip)) { goto Pass; } } break; default: // FIXME break; } } else if (qs != null || (anch_op == OpCode.String && skip == 6 + program[pc + 4])) // substring anchor // Anchor // String // True { bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0; if (qs == null) { bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0; string substring = GetString(pc + 3); qs = new QuickSearch(substring, ignore, reverse); } while ((anch_reverse && anch_ptr >= anch_begin) || (!anch_reverse && anch_ptr <= anch_end)) { if (reverse) { anch_ptr = qs.Search(text, anch_ptr, anch_begin); if (anch_ptr != -1) { anch_ptr += qs.Length; } } else { anch_ptr = qs.Search(text, anch_ptr, anch_end); } if (anch_ptr < 0) { break; } ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset; if (TryMatch(ref ptr, pc + skip)) { goto Pass; } if (reverse) { anch_ptr -= 2; } else { ++anch_ptr; } } } else if (anch_op == OpCode.True) // no anchor // Anchor // True { while ((anch_reverse && anch_ptr >= anch_begin) || (!anch_reverse && anch_ptr <= anch_end)) { ptr = anch_ptr; if (TryMatch(ref ptr, pc + skip)) { goto Pass; } if (anch_reverse) { --anch_ptr; } else { ++anch_ptr; } } } else // general case // Anchor // <expr> // True { while ((anch_reverse && anch_ptr >= anch_begin) || (!anch_reverse && anch_ptr <= anch_end)) { ptr = anch_ptr; if (Eval(Mode.Match, ref ptr, pc + 3)) { // anchor expression passed: try real expression at the correct offset ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset; if (TryMatch(ref ptr, pc + skip)) { goto Pass; } } if (anch_reverse) { --anch_ptr; } else { ++anch_ptr; } } } goto Fail; } case OpCode.False: { goto Fail; } case OpCode.True: { goto Pass; } case OpCode.Position: { if (!IsPosition((Position)program[pc + 1], ptr)) { goto Fail; } pc += 2; break; } case OpCode.String: { bool reverse = (flags & OpFlags.RightToLeft) != 0; bool ignore = (flags & OpFlags.IgnoreCase) != 0; int len = program[pc + 1]; if (reverse) { ptr -= len; if (ptr < 0) { goto Fail; } } else if (ptr + len > text_end) { goto Fail; } pc += 2; for (int i = 0; i < len; ++i) { char c = text[ptr + i]; if (ignore) { c = Char.ToLower(c); } if (c != (char)program[pc++]) { goto Fail; } } if (!reverse) { ptr += len; } break; } case OpCode.Reference: { bool reverse = (flags & OpFlags.RightToLeft) != 0; bool ignore = (flags & OpFlags.IgnoreCase) != 0; int m = GetLastDefined(program [pc + 1]); if (m < 0) { goto Fail; } int str = marks [m].Index; int len = marks [m].Length; if (reverse) { ptr -= len; if (ptr < 0) { goto Fail; } } else if (ptr + len > text_end) { goto Fail; } pc += 2; if (ignore) { for (int i = 0; i < len; ++i) { if (Char.ToLower(text[ptr + i]) != Char.ToLower(text[str + i])) { goto Fail; } } } else { for (int i = 0; i < len; ++i) { if (text[ptr + i] != text[str + i]) { goto Fail; } } } if (!reverse) { ptr += len; } break; } case OpCode.Character: case OpCode.Category: case OpCode.NotCategory: case OpCode.Range: case OpCode.Set: { if (!EvalChar(mode, ref ptr, ref pc, false)) { goto Fail; } break; } case OpCode.In: { int target = pc + program[pc + 1]; pc += 2; if (!EvalChar(mode, ref ptr, ref pc, true)) { goto Fail; } pc = target; break; } case OpCode.Open: { Open(program[pc + 1], ptr); pc += 2; break; } case OpCode.Close: { Close(program[pc + 1], ptr); pc += 2; break; } case OpCode.BalanceStart: { int start = ptr; //point before the balancing group if (!Eval(Mode.Match, ref ptr, pc + 5)) { goto Fail; } if (!Balance(program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false), start)) { goto Fail; } pc += program[pc + 4]; break; } case OpCode.Balance: { goto Pass; } case OpCode.IfDefined: { int m = GetLastDefined(program [pc + 2]); if (m < 0) { pc += program[pc + 1]; } else { pc += 3; } break; } case OpCode.Sub: { if (!Eval(Mode.Match, ref ptr, pc + 2)) { goto Fail; } pc += program[pc + 1]; break; } case OpCode.Test: { int cp = Checkpoint(); int test_ptr = ptr; if (Eval(Mode.Match, ref test_ptr, pc + 3)) { pc += program[pc + 1]; } else { Backtrack(cp); pc += program[pc + 2]; } break; } case OpCode.Branch: { OpCode branch_op; do { int cp = Checkpoint(); if (Eval(Mode.Match, ref ptr, pc + 2)) { goto Pass; } Backtrack(cp); pc += program[pc + 1]; branch_op = (OpCode)(program[pc] & 0xff); } while (branch_op != OpCode.False); goto Fail; } case OpCode.Jump: { pc += program[pc + 1]; break; } case OpCode.Repeat: { this.repeat = new RepeatContext( this.repeat, // previous context ReadProgramCount(pc + 2), // minimum ReadProgramCount(pc + 4), // maximum (flags & OpFlags.Lazy) != 0, // lazy pc + 6 // subexpression ); if (Eval(Mode.Match, ref ptr, pc + program[pc + 1])) { goto Pass; } else { this.repeat = this.repeat.Previous; goto Fail; } } case OpCode.Until: { RepeatContext current = this.repeat; // // Can we avoid recursion? // // Backtracking can be forced in nested quantifiers from the tail of this quantifier. // Thus, we cannot, in general, use a simple loop on repeat.Expression to handle // quantifiers. // // If 'deep' was unmolested, that implies that there was no nested quantifiers. // Thus, we can safely avoid recursion. // if (deep == current) { goto Pass; } int start = current.Start; int start_count = current.Count; while (!current.IsMinimum) { ++current.Count; current.Start = ptr; deep = current; if (!Eval(Mode.Match, ref ptr, current.Expression)) { current.Start = start; current.Count = start_count; goto Fail; } if (deep != current) // recursive mode { goto Pass; } } if (ptr == current.Start) { // degenerate match ... match tail or fail this.repeat = current.Previous; deep = null; if (Eval(Mode.Match, ref ptr, pc + 1)) { goto Pass; } this.repeat = current; goto Fail; } if (current.IsLazy) { for (;;) { // match tail first ... this.repeat = current.Previous; deep = null; int cp = Checkpoint(); if (Eval(Mode.Match, ref ptr, pc + 1)) { goto Pass; } Backtrack(cp); // ... then match more this.repeat = current; if (current.IsMaximum) { goto Fail; } ++current.Count; current.Start = ptr; deep = current; if (!Eval(Mode.Match, ref ptr, current.Expression)) { current.Start = start; current.Count = start_count; goto Fail; } if (deep != current) // recursive mode { goto Pass; } // Degenerate match: ptr has not moved since the last (failed) tail match. // So, next and subsequent tail matches will fail. if (ptr == current.Start) { goto Fail; } } } else { int stack_size = stack.Count; // match greedily as much as possible while (!current.IsMaximum) { int cp = Checkpoint(); int old_ptr = ptr; int old_start = current.Start; ++current.Count; current.Start = ptr; deep = current; if (!Eval(Mode.Match, ref ptr, current.Expression)) { --current.Count; current.Start = old_start; Backtrack(cp); break; } if (deep != current) { // recursive mode: no more backtracking, truncate the stack stack.Count = stack_size; goto Pass; } stack.Push(cp); stack.Push(old_ptr); // Degenerate match: no point going on if (ptr == current.Start) { break; } } // then, match the tail, backtracking as necessary. this.repeat = current.Previous; for (;;) { deep = null; if (Eval(Mode.Match, ref ptr, pc + 1)) { stack.Count = stack_size; goto Pass; } if (stack.Count == stack_size) { this.repeat = current; goto Fail; } --current.Count; ptr = stack.Pop(); Backtrack(stack.Pop()); } } } case OpCode.FastRepeat: { this.fast = new RepeatContext( fast, ReadProgramCount(pc + 2), // minimum ReadProgramCount(pc + 4), // maximum (flags & OpFlags.Lazy) != 0, // lazy pc + 6 // subexpression ); fast.Start = ptr; int cp = Checkpoint(); pc += program[pc + 1]; // tail expression ushort tail_word = program[pc]; int c1 = -1; // first character of tail operator int c2 = -1; // ... and the same character, in upper case if ignoring case int coff = 0; // 0 or -1 depending on direction OpCode tail_op = (OpCode)(tail_word & 0xff); if (tail_op == OpCode.Character || tail_op == OpCode.String) { OpFlags tail_flags = (OpFlags)(tail_word & 0xff00); if ((tail_flags & OpFlags.Negate) != 0) { goto skip; } if (tail_op == OpCode.String) { int offset = 0; if ((tail_flags & OpFlags.RightToLeft) != 0) { offset = program[pc + 1] - 1; } c1 = program[pc + 2 + offset]; // first char of string } else { c1 = program[pc + 1]; // character } if ((tail_flags & OpFlags.IgnoreCase) != 0) { c2 = Char.ToUpper((char)c1); // ignore case } else { c2 = c1; } if ((tail_flags & OpFlags.RightToLeft) != 0) { coff = -1; // reverse } else { coff = 0; } } skip: if (fast.IsLazy) { if (!fast.IsMinimum && !Eval(Mode.Count, ref ptr, fast.Expression)) { //Console.WriteLine ("lazy fast: failed mininum."); fast = fast.Previous; goto Fail; } while (true) { int p = ptr + coff; if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) { deep = null; if (Eval(Mode.Match, ref ptr, pc)) { break; } } if (fast.IsMaximum) { //Console.WriteLine ("lazy fast: failed with maximum."); fast = fast.Previous; goto Fail; } Backtrack(cp); if (!Eval(Mode.Count, ref ptr, fast.Expression)) { //Console.WriteLine ("lazy fast: no more."); fast = fast.Previous; goto Fail; } } fast = fast.Previous; goto Pass; } else { if (!Eval(Mode.Count, ref ptr, fast.Expression)) { fast = fast.Previous; goto Fail; } int width; if (fast.Count > 0) { width = (ptr - fast.Start) / fast.Count; } else { width = 0; } while (true) { int p = ptr + coff; if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) { deep = null; if (Eval(Mode.Match, ref ptr, pc)) { break; } } --fast.Count; if (!fast.IsMinimum) { fast = fast.Previous; goto Fail; } ptr -= width; Backtrack(cp); } fast = fast.Previous; goto Pass; } } case OpCode.Info: { Debug.Assert(false, "Regex", "Info block found in pattern"); goto Fail; } } } Pass: ref_ptr = ptr; switch (mode) { case Mode.Match: return(true); case Mode.Count: { ++fast.Count; if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum)) { return(true); } pc = fast.Expression; goto Begin; } } Fail: switch (mode) { case Mode.Match: return(false); case Mode.Count: { if (!fast.IsLazy && fast.IsMinimum) { return(true); } ref_ptr = fast.Start; return(false); } } return(false); }
private bool Eval(Interpreter.Mode mode, ref int ref_ptr, int pc) { int num = ref_ptr; Interpreter.RepeatContext repeatContext; int start; int count; for (;;) { OpFlags opFlags; for (;;) { ushort num2 = this.program[pc]; OpCode opCode = (OpCode)(num2 & 255); opFlags = (OpFlags)(num2 & 65280); switch (opCode) { case OpCode.False: goto IL_4B8; case OpCode.True: goto IL_4BD; case OpCode.Position: if (!this.IsPosition((Position)this.program[pc + 1], num)) { goto Block_44; } pc += 2; break; case OpCode.String: { bool flag = (ushort)(opFlags & OpFlags.RightToLeft) != 0; bool flag2 = (ushort)(opFlags & OpFlags.IgnoreCase) != 0; int num3 = (int)this.program[pc + 1]; if (flag) { num -= num3; if (num < 0) { goto Block_46; } } else if (num + num3 > this.text_end) { goto Block_47; } pc += 2; for (int i = 0; i < num3; i++) { char c = this.text[num + i]; if (flag2) { c = char.ToLower(c); } if (c != (char)this.program[pc++]) { goto Block_49; } } if (!flag) { num += num3; } break; } case OpCode.Reference: { bool flag3 = (ushort)(opFlags & OpFlags.RightToLeft) != 0; bool flag4 = (ushort)(opFlags & OpFlags.IgnoreCase) != 0; int lastDefined = this.GetLastDefined((int)this.program[pc + 1]); if (lastDefined < 0) { goto Block_52; } int index = this.marks[lastDefined].Index; int length = this.marks[lastDefined].Length; if (flag3) { num -= length; if (num < 0) { goto Block_54; } } else if (num + length > this.text_end) { goto Block_55; } pc += 2; if (flag4) { for (int j = 0; j < length; j++) { if (char.ToLower(this.text[num + j]) != char.ToLower(this.text[index + j])) { goto Block_57; } } } else { for (int k = 0; k < length; k++) { if (this.text[num + k] != this.text[index + k]) { goto Block_59; } } } if (!flag3) { num += length; } break; } case OpCode.Character: case OpCode.Category: case OpCode.NotCategory: case OpCode.Range: case OpCode.Set: if (!this.EvalChar(mode, ref num, ref pc, false)) { goto Block_61; } break; case OpCode.In: { int num4 = pc + (int)this.program[pc + 1]; pc += 2; if (!this.EvalChar(mode, ref num, ref pc, true)) { goto Block_62; } pc = num4; break; } case OpCode.Open: this.Open((int)this.program[pc + 1], num); pc += 2; break; case OpCode.Close: this.Close((int)this.program[pc + 1], num); pc += 2; break; case OpCode.Balance: goto IL_7DB; case OpCode.BalanceStart: { int ptr = num; if (!this.Eval(Interpreter.Mode.Match, ref num, pc + 5)) { goto Block_63; } if (!this.Balance((int)this.program[pc + 1], (int)this.program[pc + 2], this.program[pc + 3] == 1, ptr)) { goto Block_65; } pc += (int)this.program[pc + 4]; break; } case OpCode.IfDefined: { int lastDefined2 = this.GetLastDefined((int)this.program[pc + 2]); if (lastDefined2 < 0) { pc += (int)this.program[pc + 1]; } else { pc += 3; } break; } case OpCode.Sub: if (!this.Eval(Interpreter.Mode.Match, ref num, pc + 2)) { goto Block_67; } pc += (int)this.program[pc + 1]; break; case OpCode.Test: { int cp = this.Checkpoint(); int num5 = num; if (this.Eval(Interpreter.Mode.Match, ref num5, pc + 3)) { pc += (int)this.program[pc + 1]; } else { this.Backtrack(cp); pc += (int)this.program[pc + 2]; } break; } case OpCode.Branch: goto IL_88A; case OpCode.Jump: pc += (int)this.program[pc + 1]; break; case OpCode.Repeat: goto IL_8EE; case OpCode.Until: goto IL_957; case OpCode.FastRepeat: goto IL_C6F; case OpCode.Anchor: goto IL_96; case OpCode.Info: goto IL_FE9; } } for (;;) { IL_88A: int cp2 = this.Checkpoint(); if (this.Eval(Interpreter.Mode.Match, ref num, pc + 2)) { break; } this.Backtrack(cp2); pc += (int)this.program[pc + 1]; if ((this.program[pc] & 255) == 0) { goto Block_70; } } IL_FF3: ref_ptr = num; if (mode == Interpreter.Mode.Match) { return(true); } if (mode != Interpreter.Mode.Count) { break; } this.fast.Count++; if (this.fast.IsMaximum || (this.fast.IsLazy && this.fast.IsMinimum)) { return(true); } pc = this.fast.Expression; continue; IL_96: int num6 = (int)this.program[pc + 1]; int num7 = (int)this.program[pc + 2]; bool flag5 = (ushort)(opFlags & OpFlags.RightToLeft) != 0; int num8 = (!flag5) ? (num + num7) : (num - num7); int num9 = this.text_end - this.match_min + num7; int num10 = 0; OpCode opCode2 = (OpCode)(this.program[pc + 3] & 255); if (opCode2 == OpCode.Position && num6 == 6) { switch (this.program[pc + 4]) { case 2: if (flag5 || num7 == 0) { if (flag5) { num = num7; } if (this.TryMatch(ref num, pc + num6)) { goto IL_FF3; } } break; case 3: if (num8 == 0) { num = 0; if (this.TryMatch(ref num, pc + num6)) { goto IL_FF3; } num8++; } while ((flag5 && num8 >= 0) || (!flag5 && num8 <= num9)) { if (num8 == 0 || this.text[num8 - 1] == '\n') { if (flag5) { num = ((num8 != num9) ? (num8 + num7) : num8); } else { num = ((num8 != 0) ? (num8 - num7) : num8); } if (this.TryMatch(ref num, pc + num6)) { goto IL_FF3; } } if (flag5) { num8--; } else { num8++; } } break; case 4: if (num8 == this.scan_ptr) { num = ((!flag5) ? (this.scan_ptr - num7) : (this.scan_ptr + num7)); if (this.TryMatch(ref num, pc + num6)) { goto IL_FF3; } } break; } break; } if (this.qs != null || (opCode2 == OpCode.String && num6 == (int)(6 + this.program[pc + 4]))) { bool flag6 = (this.program[pc + 3] & 1024) != 0; if (this.qs == null) { bool ignore = (this.program[pc + 3] & 512) != 0; string @string = this.GetString(pc + 3); this.qs = new QuickSearch(@string, ignore, flag6); } while ((flag5 && num8 >= num10) || (!flag5 && num8 <= num9)) { if (flag6) { num8 = this.qs.Search(this.text, num8, num10); if (num8 != -1) { num8 += this.qs.Length; } } else { num8 = this.qs.Search(this.text, num8, num9); } if (num8 < 0) { break; } num = ((!flag6) ? (num8 - num7) : (num8 + num7)); if (this.TryMatch(ref num, pc + num6)) { goto IL_FF3; } if (flag6) { num8 -= 2; } else { num8++; } } break; } if (opCode2 == OpCode.True) { while ((flag5 && num8 >= num10) || (!flag5 && num8 <= num9)) { num = num8; if (this.TryMatch(ref num, pc + num6)) { goto IL_FF3; } if (flag5) { num8--; } else { num8++; } } break; } while ((flag5 && num8 >= num10) || (!flag5 && num8 <= num9)) { num = num8; if (this.Eval(Interpreter.Mode.Match, ref num, pc + 3)) { num = ((!flag5) ? (num8 - num7) : (num8 + num7)); if (this.TryMatch(ref num, pc + num6)) { goto IL_FF3; } } if (flag5) { num8--; } else { num8++; } } break; IL_4BD: IL_7DB: goto IL_FF3; IL_8EE: this.repeat = new Interpreter.RepeatContext(this.repeat, this.ReadProgramCount(pc + 2), this.ReadProgramCount(pc + 4), (ushort)(opFlags & OpFlags.Lazy) != 0, pc + 6); if (this.Eval(Interpreter.Mode.Match, ref num, pc + (int)this.program[pc + 1])) { goto IL_FF3; } goto IL_941; IL_957: repeatContext = this.repeat; if (this.deep == repeatContext) { goto IL_FF3; } start = repeatContext.Start; count = repeatContext.Count; while (!repeatContext.IsMinimum) { repeatContext.Count++; repeatContext.Start = num; this.deep = repeatContext; if (!this.Eval(Interpreter.Mode.Match, ref num, repeatContext.Expression)) { goto Block_73; } if (this.deep != repeatContext) { goto IL_FF3; } } if (num == repeatContext.Start) { this.repeat = repeatContext.Previous; this.deep = null; if (this.Eval(Interpreter.Mode.Match, ref num, pc + 1)) { goto IL_FF3; } goto IL_A28; } else { if (repeatContext.IsLazy) { for (;;) { this.repeat = repeatContext.Previous; this.deep = null; int cp3 = this.Checkpoint(); if (this.Eval(Interpreter.Mode.Match, ref num, pc + 1)) { break; } this.Backtrack(cp3); this.repeat = repeatContext; if (repeatContext.IsMaximum) { goto Block_80; } repeatContext.Count++; repeatContext.Start = num; this.deep = repeatContext; if (!this.Eval(Interpreter.Mode.Match, ref num, repeatContext.Expression)) { goto Block_81; } if (this.deep != repeatContext) { break; } if (num == repeatContext.Start) { goto Block_83; } } goto IL_FF3; } int count2 = this.stack.Count; while (!repeatContext.IsMaximum) { int num11 = this.Checkpoint(); int value = num; int start2 = repeatContext.Start; repeatContext.Count++; repeatContext.Start = num; this.deep = repeatContext; if (!this.Eval(Interpreter.Mode.Match, ref num, repeatContext.Expression)) { repeatContext.Count--; repeatContext.Start = start2; this.Backtrack(num11); break; } if (this.deep != repeatContext) { this.stack.Count = count2; goto IL_FF3; } this.stack.Push(num11); this.stack.Push(value); if (num == repeatContext.Start) { break; } } this.repeat = repeatContext.Previous; for (;;) { this.deep = null; if (this.Eval(Interpreter.Mode.Match, ref num, pc + 1)) { break; } if (this.stack.Count == count2) { goto Block_88; } repeatContext.Count--; num = this.stack.Pop(); this.Backtrack(this.stack.Pop()); } this.stack.Count = count2; goto IL_FF3; } IL_C6F: this.fast = new Interpreter.RepeatContext(this.fast, this.ReadProgramCount(pc + 2), this.ReadProgramCount(pc + 4), (ushort)(opFlags & OpFlags.Lazy) != 0, pc + 6); this.fast.Start = num; int cp4 = this.Checkpoint(); pc += (int)this.program[pc + 1]; ushort num12 = this.program[pc]; int num13 = -1; int num14 = -1; int num15 = 0; OpCode opCode3 = (OpCode)(num12 & 255); if (opCode3 == OpCode.Character || opCode3 == OpCode.String) { OpFlags opFlags2 = (OpFlags)(num12 & 65280); if ((ushort)(opFlags2 & OpFlags.Negate) == 0) { if (opCode3 == OpCode.String) { int num16 = 0; if ((ushort)(opFlags2 & OpFlags.RightToLeft) != 0) { num16 = (int)(this.program[pc + 1] - 1); } num13 = (int)this.program[pc + 2 + num16]; } else { num13 = (int)this.program[pc + 1]; } if ((ushort)(opFlags2 & OpFlags.IgnoreCase) != 0) { num14 = (int)char.ToUpper((char)num13); } else { num14 = num13; } if ((ushort)(opFlags2 & OpFlags.RightToLeft) != 0) { num15 = -1; } else { num15 = 0; } } } if (this.fast.IsLazy) { if (!this.fast.IsMinimum && !this.Eval(Interpreter.Mode.Count, ref num, this.fast.Expression)) { goto Block_97; } for (;;) { int num17 = num + num15; if (num13 < 0 || (num17 >= 0 && num17 < this.text_end && (num13 == (int)this.text[num17] || num14 == (int)this.text[num17]))) { this.deep = null; if (this.Eval(Interpreter.Mode.Match, ref num, pc)) { break; } } if (this.fast.IsMaximum) { goto Block_103; } this.Backtrack(cp4); if (!this.Eval(Interpreter.Mode.Count, ref num, this.fast.Expression)) { goto Block_104; } } this.fast = this.fast.Previous; goto IL_FF3; } else { if (!this.Eval(Interpreter.Mode.Count, ref num, this.fast.Expression)) { goto Block_105; } int num18; if (this.fast.Count > 0) { num18 = (num - this.fast.Start) / this.fast.Count; } else { num18 = 0; } for (;;) { int num19 = num + num15; if (num13 < 0 || (num19 >= 0 && num19 < this.text_end && (num13 == (int)this.text[num19] || num14 == (int)this.text[num19]))) { this.deep = null; if (this.Eval(Interpreter.Mode.Match, ref num, pc)) { break; } } this.fast.Count--; if (!this.fast.IsMinimum) { goto Block_112; } num -= num18; this.Backtrack(cp4); } this.fast = this.fast.Previous; goto IL_FF3; } } IL_4B8: Block_44: Block_46: Block_47: Block_49: Block_52: Block_54: Block_55: Block_57: Block_59: Block_61: Block_62: Block_63: Block_65: Block_67: Block_70: goto IL_1067; IL_941: this.repeat = this.repeat.Previous; goto IL_1067; Block_73: repeatContext.Start = start; repeatContext.Count = count; goto IL_1067; IL_A28: this.repeat = repeatContext; Block_80: goto IL_1067; Block_81: repeatContext.Start = start; repeatContext.Count = count; Block_83: goto IL_1067; Block_88: this.repeat = repeatContext; goto IL_1067; Block_97: this.fast = this.fast.Previous; goto IL_1067; Block_103: this.fast = this.fast.Previous; goto IL_1067; Block_104: this.fast = this.fast.Previous; goto IL_1067; Block_105: this.fast = this.fast.Previous; goto IL_1067; Block_112: this.fast = this.fast.Previous; IL_FE9: IL_1067: if (mode == Interpreter.Mode.Match) { return(false); } if (mode != Interpreter.Mode.Count) { return(false); } if (!this.fast.IsLazy && this.fast.IsMinimum) { return(true); } ref_ptr = this.fast.Start; return(false); }
private bool Eval (Mode mode, ref int ref_ptr, int pc) { int ptr = ref_ptr; Begin: for (;;) { ushort word = program[pc]; OpCode op = (OpCode)(word & 0x00ff); OpFlags flags = (OpFlags)(word & 0xff00); switch (op) { case OpCode.Anchor: { int skip = program[pc + 1]; int anch_offset = program[pc + 2]; bool anch_reverse = (flags & OpFlags.RightToLeft) != 0; int anch_ptr = anch_reverse ? ptr - anch_offset : ptr + anch_offset; int anch_end = text_end - match_min + anch_offset; // maximum anchor position int anch_begin = 0; // the general case for an anchoring expression is at the bottom, however we // do some checks for the common cases before to save processing time. the current // optimizer only outputs three types of anchoring expressions: fixed position, // fixed substring, and no anchor. OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff); if (anch_op == OpCode.Position && skip == 6) { // position anchor // Anchor // Position // True switch ((Position)program[pc + 4]) { case Position.StartOfString: if (anch_reverse || anch_offset == 0) { if (anch_reverse) ptr = anch_offset; if (TryMatch (ref ptr, pc + skip)) goto Pass; } break; case Position.StartOfLine: if (anch_ptr == 0) { ptr = 0; if (TryMatch (ref ptr, pc + skip)) goto Pass; ++ anch_ptr; } while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) { if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') { if (anch_reverse) ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset; else ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset; if (TryMatch (ref ptr, pc + skip)) goto Pass; } if (anch_reverse) -- anch_ptr; else ++ anch_ptr; } break; case Position.StartOfScan: if (anch_ptr == scan_ptr) { ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset; if (TryMatch (ref ptr, pc + skip)) goto Pass; } break; default: // FIXME break; } } else if (qs != null || (anch_op == OpCode.String && skip == 6 + program[pc + 4])) { // substring anchor // Anchor // String // True bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0; if (qs == null) { bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0; string substring = GetString (pc + 3); qs = new QuickSearch (substring, ignore, reverse); } while ((anch_reverse && anch_ptr >= anch_begin) || (!anch_reverse && anch_ptr <= anch_end)) { if (reverse) { anch_ptr = qs.Search (text, anch_ptr, anch_begin); if (anch_ptr != -1) anch_ptr += qs.Length ; } else anch_ptr = qs.Search (text, anch_ptr, anch_end); if (anch_ptr < 0) break; ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset; if (TryMatch (ref ptr, pc + skip)) goto Pass; if (reverse) anch_ptr -= 2; else ++ anch_ptr; } } else if (anch_op == OpCode.True) { // no anchor // Anchor // True while ((anch_reverse && anch_ptr >= anch_begin) || (!anch_reverse && anch_ptr <= anch_end)) { ptr = anch_ptr; if (TryMatch (ref ptr, pc + skip)) goto Pass; if (anch_reverse) -- anch_ptr; else ++ anch_ptr; } } else { // general case // Anchor // <expr> // True while ((anch_reverse && anch_ptr >= anch_begin) || (!anch_reverse && anch_ptr <= anch_end)) { ptr = anch_ptr; if (Eval (Mode.Match, ref ptr, pc + 3)) { // anchor expression passed: try real expression at the correct offset ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset; if (TryMatch (ref ptr, pc + skip)) goto Pass; } if (anch_reverse) -- anch_ptr; else ++ anch_ptr; } } goto Fail; } case OpCode.False: { goto Fail; } case OpCode.True: { goto Pass; } case OpCode.Position: { if (!IsPosition ((Position)program[pc + 1], ptr)) goto Fail; pc += 2; break; } case OpCode.String: { bool reverse = (flags & OpFlags.RightToLeft) != 0; bool ignore = (flags & OpFlags.IgnoreCase) != 0; int len = program[pc + 1]; if (reverse) { ptr -= len; if (ptr < 0) goto Fail; } else if (ptr + len > text_end) goto Fail; pc += 2; for (int i = 0; i < len; ++ i) { char c = text[ptr + i]; if (ignore) c = Char.ToLower (c); if (c != (char)program[pc ++]) goto Fail; } if (!reverse) ptr += len; break; } case OpCode.Reference: { bool reverse = (flags & OpFlags.RightToLeft) != 0; bool ignore = (flags & OpFlags.IgnoreCase) != 0; int m = GetLastDefined (program [pc + 1]); if (m < 0) goto Fail; int str = marks [m].Index; int len = marks [m].Length; if (reverse) { ptr -= len; if (ptr < 0) goto Fail; } else if (ptr + len > text_end) goto Fail; pc += 2; if (ignore) { for (int i = 0; i < len; ++ i) { if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i])) goto Fail; } } else { for (int i = 0; i < len; ++ i) { if (text[ptr + i] != text[str + i]) goto Fail; } } if (!reverse) ptr += len; break; } case OpCode.Character: case OpCode.Category: case OpCode.NotCategory: case OpCode.Range: case OpCode.Set: { if (!EvalChar (mode, ref ptr, ref pc, false)) goto Fail; break; } case OpCode.In: { int target = pc + program[pc + 1]; pc += 2; if (!EvalChar (mode, ref ptr, ref pc, true)) goto Fail; pc = target; break; } case OpCode.Open: { Open (program[pc + 1], ptr); pc += 2; break; } case OpCode.Close: { Close (program[pc + 1], ptr); pc += 2; break; } case OpCode.BalanceStart: { int start = ptr; //point before the balancing group if (!Eval (Mode.Match, ref ptr, pc + 5)) goto Fail; if(!Balance (program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false) , start)) { goto Fail; } pc += program[pc + 4]; break; } case OpCode.Balance: { goto Pass; } case OpCode.IfDefined: { int m = GetLastDefined (program [pc + 2]); if (m < 0) pc += program[pc + 1]; else pc += 3; break; } case OpCode.Sub: { if (!Eval (Mode.Match, ref ptr, pc + 2)) goto Fail; pc += program[pc + 1]; break; } case OpCode.Test: { int cp = Checkpoint (); int test_ptr = ptr; if (Eval (Mode.Match, ref test_ptr, pc + 3)) pc += program[pc + 1]; else { Backtrack (cp); pc += program[pc + 2]; } break; } case OpCode.Branch: { OpCode branch_op; do { int cp = Checkpoint (); if (Eval (Mode.Match, ref ptr, pc + 2)) goto Pass; Backtrack (cp); pc += program[pc + 1]; branch_op = (OpCode)(program[pc] & 0xff); } while (branch_op != OpCode.False); goto Fail; } case OpCode.Jump: { pc += program[pc + 1]; break; } case OpCode.Repeat: { this.repeat = new RepeatContext ( this.repeat, // previous context ReadProgramCount (pc + 2), // minimum ReadProgramCount (pc + 4), // maximum (flags & OpFlags.Lazy) != 0, // lazy pc + 6 // subexpression ); if (Eval (Mode.Match, ref ptr, pc + program[pc + 1])) goto Pass; else { this.repeat = this.repeat.Previous; goto Fail; } } case OpCode.Until: { RepeatContext current = this.repeat; // // Can we avoid recursion? // // Backtracking can be forced in nested quantifiers from the tail of this quantifier. // Thus, we cannot, in general, use a simple loop on repeat.Expression to handle // quantifiers. // // If 'deep' was unmolested, that implies that there was no nested quantifiers. // Thus, we can safely avoid recursion. // if (deep == current) goto Pass; int start = current.Start; int start_count = current.Count; while (!current.IsMinimum) { ++ current.Count; current.Start = ptr; deep = current; if (!Eval (Mode.Match, ref ptr, current.Expression)) { current.Start = start; current.Count = start_count; goto Fail; } if (deep != current) // recursive mode goto Pass; } if (ptr == current.Start) { // degenerate match ... match tail or fail this.repeat = current.Previous; deep = null; if (Eval (Mode.Match, ref ptr, pc + 1)) goto Pass; this.repeat = current; goto Fail; } if (current.IsLazy) { for (;;) { // match tail first ... this.repeat = current.Previous; deep = null; int cp = Checkpoint (); if (Eval (Mode.Match, ref ptr, pc + 1)) goto Pass; Backtrack (cp); // ... then match more this.repeat = current; if (current.IsMaximum) goto Fail; ++ current.Count; current.Start = ptr; deep = current; if (!Eval (Mode.Match, ref ptr, current.Expression)) { current.Start = start; current.Count = start_count; goto Fail; } if (deep != current) // recursive mode goto Pass; // Degenerate match: ptr has not moved since the last (failed) tail match. // So, next and subsequent tail matches will fail. if (ptr == current.Start) goto Fail; } } else { int stack_size = stack.Count; // match greedily as much as possible while (!current.IsMaximum) { int cp = Checkpoint (); int old_ptr = ptr; int old_start = current.Start; ++ current.Count; current.Start = ptr; deep = current; if (!Eval (Mode.Match, ref ptr, current.Expression)) { -- current.Count; current.Start = old_start; Backtrack (cp); break; } if (deep != current) { // recursive mode: no more backtracking, truncate the stack stack.Count = stack_size; goto Pass; } stack.Push (cp); stack.Push (old_ptr); // Degenerate match: no point going on if (ptr == current.Start) break; } // then, match the tail, backtracking as necessary. this.repeat = current.Previous; for (;;) { deep = null; if (Eval (Mode.Match, ref ptr, pc + 1)) { stack.Count = stack_size; goto Pass; } if (stack.Count == stack_size) { this.repeat = current; goto Fail; } --current.Count; ptr = stack.Pop (); Backtrack (stack.Pop ()); } } } case OpCode.FastRepeat: { this.fast = new RepeatContext ( fast, ReadProgramCount (pc + 2), // minimum ReadProgramCount (pc + 4), // maximum (flags & OpFlags.Lazy) != 0, // lazy pc + 6 // subexpression ); fast.Start = ptr; int cp = Checkpoint (); pc += program[pc + 1]; // tail expression ushort tail_word = program[pc]; int c1 = -1; // first character of tail operator int c2 = -1; // ... and the same character, in upper case if ignoring case int coff = 0; // 0 or -1 depending on direction OpCode tail_op = (OpCode)(tail_word & 0xff); if (tail_op == OpCode.Character || tail_op == OpCode.String) { OpFlags tail_flags = (OpFlags)(tail_word & 0xff00); if ((tail_flags & OpFlags.Negate) != 0) goto skip; if (tail_op == OpCode.String) { int offset = 0; if ((tail_flags & OpFlags.RightToLeft) != 0) { offset = program[pc + 1] - 1 ; } c1 = program[pc + 2 + offset]; // first char of string } else c1 = program[pc + 1]; // character if ((tail_flags & OpFlags.IgnoreCase) != 0) c2 = Char.ToUpper ((char)c1); // ignore case else c2 = c1; if ((tail_flags & OpFlags.RightToLeft) != 0) coff = -1; // reverse else coff = 0; } skip: if (fast.IsLazy) { if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) { //Console.WriteLine ("lazy fast: failed mininum."); fast = fast.Previous; goto Fail; } while (true) { int p = ptr + coff; if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) { deep = null; if (Eval (Mode.Match, ref ptr, pc)) break; } if (fast.IsMaximum) { //Console.WriteLine ("lazy fast: failed with maximum."); fast = fast.Previous; goto Fail; } Backtrack (cp); if (!Eval (Mode.Count, ref ptr, fast.Expression)) { //Console.WriteLine ("lazy fast: no more."); fast = fast.Previous; goto Fail; } } fast = fast.Previous; goto Pass; } else { if (!Eval (Mode.Count, ref ptr, fast.Expression)) { fast = fast.Previous; goto Fail; } int width; if (fast.Count > 0) width = (ptr - fast.Start) / fast.Count; else width = 0; while (true) { int p = ptr + coff; if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) { deep = null; if (Eval (Mode.Match, ref ptr, pc)) break; } -- fast.Count; if (!fast.IsMinimum) { fast = fast.Previous; goto Fail; } ptr -= width; Backtrack (cp); } fast = fast.Previous; goto Pass; } } case OpCode.Info: { Debug.Assert (false, "Regex", "Info block found in pattern"); goto Fail; } } } Pass: ref_ptr = ptr; switch (mode) { case Mode.Match: return true; case Mode.Count: { ++ fast.Count; if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum)) return true; pc = fast.Expression; goto Begin; } } Fail: switch (mode) { case Mode.Match: return false; case Mode.Count: { if (!fast.IsLazy && fast.IsMinimum) return true; ref_ptr = fast.Start; return false; } } return false; }