internal static RECompiled CompileRE(Context cx, string str, string global, bool flat) { RECompiled regexp = new RECompiled(str); int length = str.Length; int flags = 0; if (global != null) { for (int i = 0; i < global.Length; i++) { char c = global[i]; if (c == 'g') { flags |= JSREG_GLOB; } else { if (c == 'i') { flags |= JSREG_FOLD; } else { if (c == 'm') { flags |= JSREG_MULTILINE; } else { ReportError("msg.invalid.re.flag", c.ToString()); } } } } } regexp.flags = flags; CompilerState state = new CompilerState(cx, regexp.source, length, flags); if (flat && length > 0) { state.result = new RENode(REOP_FLAT); state.result.chr = state.cpbegin[0]; state.result.length = length; state.result.flatIndex = 0; state.progLength += 5; } else { if (!ParseDisjunction(state)) { return null; } } regexp.program = new byte[state.progLength + 1]; if (state.classCount != 0) { regexp.classList = new RECharSet[state.classCount]; regexp.classCount = state.classCount; } int endPC = EmitREBytecode(state, regexp, 0, state.result); regexp.program[endPC++] = REOP_END; regexp.parenCount = state.parenCount; switch (regexp.program[0]) { case REOP_UCFLAT1: case REOP_UCFLAT1i: { // If re starts with literal, init anchorCh accordingly regexp.anchorCh = (char)GetIndex(regexp.program, 1); break; } case REOP_FLAT1: case REOP_FLAT1i: { regexp.anchorCh = (char)(regexp.program[1] & unchecked((int)(0xFF))); break; } case REOP_FLAT: case REOP_FLATi: { int k = GetIndex(regexp.program, 1); regexp.anchorCh = regexp.source[k]; break; } case REOP_BOL: { regexp.anchorCh = ANCHOR_BOL; break; } case REOP_ALT: { RENode n = state.result; if (n.kid.op == REOP_BOL && n.kid2.op == REOP_BOL) { regexp.anchorCh = ANCHOR_BOL; } break; } } return regexp; }
internal NativeRegExp(Scriptable scope, RECompiled regexpCompiled) { this.re = regexpCompiled; this.lastIndex = 0; ScriptRuntime.SetBuiltinProtoAndParent(this, scope, TopLevel.Builtins.RegExp); }
private static bool MatchRegExp(REGlobalData gData, RECompiled re, string input, int start, int end, bool multiline) { if (re.parenCount != 0) { gData.parens = new long[re.parenCount]; } else { gData.parens = null; } gData.backTrackStackTop = null; gData.stateStackTop = null; gData.multiline = multiline || (re.flags & JSREG_MULTILINE) != 0; gData.regexp = re; int anchorCh = gData.regexp.anchorCh; // // have to include the position beyond the last character // in order to detect end-of-input/line condition // for (int i = start; i <= end; ++i) { // // If the first node is a literal match, step the index into // the string until that match is made, or fail if it can't be // found at all. // if (anchorCh >= 0) { for (; ; ) { if (i == end) { return false; } char matchCh = input[i]; if (matchCh == anchorCh || ((gData.regexp.flags & JSREG_FOLD) != 0 && Upcase(matchCh) == Upcase((char)anchorCh))) { break; } ++i; } } gData.cp = i; gData.skipped = i - start; for (int j = 0; j < re.parenCount; j++) { gData.parens[j] = -1l; } bool result = ExecuteREBytecode(gData, input, end); gData.backTrackStackTop = null; gData.stateStackTop = null; if (result) { return true; } if (anchorCh == ANCHOR_BOL && !gData.multiline) { gData.skipped = end; return false; } i = start + gData.skipped; } return false; }
private static int EmitREBytecode(CompilerState state, RECompiled re, int pc, RENode t) { RENode nextAlt; int nextAltFixup; int nextTermFixup; byte[] program = re.program; while (t != null) { program[pc++] = t.op; switch (t.op) { case REOP_EMPTY: { --pc; break; } case REOP_ALTPREREQ: case REOP_ALTPREREQi: case REOP_ALTPREREQ2: { bool ignoreCase = t.op == REOP_ALTPREREQi; AddIndex(program, pc, ignoreCase ? Upcase(t.chr) : t.chr); pc += INDEX_LEN; AddIndex(program, pc, ignoreCase ? Upcase((char)t.index) : t.index); pc += INDEX_LEN; goto case REOP_ALT; } case REOP_ALT: { // fall through to REOP_ALT nextAlt = t.kid2; nextAltFixup = pc; pc += INDEX_LEN; pc = EmitREBytecode(state, re, pc, t.kid); program[pc++] = REOP_JUMP; nextTermFixup = pc; pc += INDEX_LEN; ResolveForwardJump(program, nextAltFixup, pc); pc = EmitREBytecode(state, re, pc, nextAlt); program[pc++] = REOP_JUMP; nextAltFixup = pc; pc += INDEX_LEN; ResolveForwardJump(program, nextTermFixup, pc); ResolveForwardJump(program, nextAltFixup, pc); break; } case REOP_FLAT: { if (t.flatIndex != -1) { while ((t.next != null) && (t.next.op == REOP_FLAT) && ((t.flatIndex + t.length) == t.next.flatIndex)) { t.length += t.next.length; t.next = t.next.next; } } if ((t.flatIndex != -1) && (t.length > 1)) { if ((state.flags & JSREG_FOLD) != 0) { program[pc - 1] = REOP_FLATi; } else { program[pc - 1] = REOP_FLAT; } pc = AddIndex(program, pc, t.flatIndex); pc = AddIndex(program, pc, t.length); } else { if (t.chr < 256) { if ((state.flags & JSREG_FOLD) != 0) { program[pc - 1] = REOP_FLAT1i; } else { program[pc - 1] = REOP_FLAT1; } program[pc++] = unchecked((byte)(t.chr)); } else { if ((state.flags & JSREG_FOLD) != 0) { program[pc - 1] = REOP_UCFLAT1i; } else { program[pc - 1] = REOP_UCFLAT1; } pc = AddIndex(program, pc, t.chr); } } break; } case REOP_LPAREN: { pc = AddIndex(program, pc, t.parenIndex); pc = EmitREBytecode(state, re, pc, t.kid); program[pc++] = REOP_RPAREN; pc = AddIndex(program, pc, t.parenIndex); break; } case REOP_BACKREF: { pc = AddIndex(program, pc, t.parenIndex); break; } case REOP_ASSERT: { nextTermFixup = pc; pc += INDEX_LEN; pc = EmitREBytecode(state, re, pc, t.kid); program[pc++] = REOP_ASSERTTEST; ResolveForwardJump(program, nextTermFixup, pc); break; } case REOP_ASSERT_NOT: { nextTermFixup = pc; pc += INDEX_LEN; pc = EmitREBytecode(state, re, pc, t.kid); program[pc++] = REOP_ASSERTNOTTEST; ResolveForwardJump(program, nextTermFixup, pc); break; } case REOP_QUANT: { if ((t.min == 0) && (t.max == -1)) { program[pc - 1] = (t.greedy) ? REOP_STAR : REOP_MINIMALSTAR; } else { if ((t.min == 0) && (t.max == 1)) { program[pc - 1] = (t.greedy) ? REOP_OPT : REOP_MINIMALOPT; } else { if ((t.min == 1) && (t.max == -1)) { program[pc - 1] = (t.greedy) ? REOP_PLUS : REOP_MINIMALPLUS; } else { if (!t.greedy) { program[pc - 1] = REOP_MINIMALQUANT; } pc = AddIndex(program, pc, t.min); // max can be -1 which addIndex does not accept pc = AddIndex(program, pc, t.max + 1); } } } pc = AddIndex(program, pc, t.parenCount); pc = AddIndex(program, pc, t.parenIndex); nextTermFixup = pc; pc += INDEX_LEN; pc = EmitREBytecode(state, re, pc, t.kid); program[pc++] = REOP_ENDCHILD; ResolveForwardJump(program, nextTermFixup, pc); break; } case REOP_CLASS: { if (!t.sense) { program[pc - 1] = REOP_NCLASS; } pc = AddIndex(program, pc, t.index); re.classList[t.index] = new RECharSet(t.bmsize, t.startIndex, t.kidlen, t.sense); break; } default: { break; } } t = t.next; } return pc; }