private static bool matchRegExp(REGlobalData gData, RECompiled re, char [] chars, int start, int end, bool multiline) { if (re.parenCount != 0) { gData.parens = new long [re.parenCount]; } else { gData.parens = null; } gData.backTrackStackTop = null; gData.stateStackTop = null; gData.multiline = multiline; gData.regexp = re; gData.lastParen = 0; int anchorCh = gData.regexp.anchorCh; // // have to include the position beyond the last character // in order to detect end-of-input/line condition // for (int i = start; i <= end; ++i) { // // If the first node is a literal match, step the index into // the string until that match is made, or fail if it can't be // found at all. // if (anchorCh >= 0) { for (; ; ) { if (i == end) { return false; } char matchCh = chars [i]; if (matchCh == anchorCh || ((gData.regexp.flags & JSREG_FOLD) != 0 && upcase (matchCh) == upcase ((char)anchorCh))) { break; } ++i; } } gData.cp = i; for (int j = 0; j < re.parenCount; j++) { gData.set_parens (j, -1, 0); } bool result = executeREBytecode (gData, chars, end); gData.backTrackStackTop = null; gData.stateStackTop = null; if (result) { gData.skipped = i - start; return true; } } return false; }
private static bool executeREBytecode(REGlobalData gData, char [] chars, int end) { int pc = 0; sbyte [] program = gData.regexp.program; int currentContinuation_op; int currentContinuation_pc; bool result = false; currentContinuation_pc = 0; currentContinuation_op = REOP_END; if (debug) { System.Console.Out.WriteLine ("Input = \"" + new string (chars) + "\", start at " + gData.cp); } int op = program [pc++]; for (; ; ) { if (debug) { System.Console.Out.WriteLine ("Testing at " + gData.cp + ", op = " + op); } switch (op) { case REOP_EMPTY: result = true; break; case REOP_BOL: if (gData.cp != 0) { if (gData.multiline || ((gData.regexp.flags & JSREG_MULTILINE) != 0)) { if (!isLineTerm (chars [gData.cp - 1])) { result = false; break; } } else { result = false; break; } } result = true; break; case REOP_EOL: if (gData.cp != end) { if (gData.multiline || ((gData.regexp.flags & JSREG_MULTILINE) != 0)) { if (!isLineTerm (chars [gData.cp])) { result = false; break; } } else { result = false; break; } } result = true; break; case REOP_WBDRY: result = ((gData.cp == 0 || !isWord (chars [gData.cp - 1])) ^ !((gData.cp < end) && isWord (chars [gData.cp]))); break; case REOP_WNONBDRY: result = ((gData.cp == 0 || !isWord (chars [gData.cp - 1])) ^ ((gData.cp < end) && isWord (chars [gData.cp]))); break; case REOP_DOT: result = (gData.cp != end && !isLineTerm (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_DIGIT: result = (gData.cp != end && isDigit (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_NONDIGIT: result = (gData.cp != end && !isDigit (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_SPACE: result = (gData.cp != end && isREWhiteSpace (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_NONSPACE: result = (gData.cp != end && !isREWhiteSpace (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_ALNUM: result = (gData.cp != end && isWord (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_NONALNUM: result = (gData.cp != end && !isWord (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_FLAT: { int offset = getIndex (program, pc); pc += INDEX_LEN; int length = getIndex (program, pc); pc += INDEX_LEN; result = flatNMatcher (gData, offset, length, chars, end); } break; case REOP_FLATi: { int offset = getIndex (program, pc); pc += INDEX_LEN; int length = getIndex (program, pc); pc += INDEX_LEN; result = flatNIMatcher (gData, offset, length, chars, end); } break; case REOP_FLAT1: { char matchCh = (char)(program [pc++] & 0xFF); result = (gData.cp != end && chars [gData.cp] == matchCh); if (result) { gData.cp++; } } break; case REOP_FLAT1i: { char matchCh = (char)(program [pc++] & 0xFF); result = (gData.cp != end && upcase (chars [gData.cp]) == upcase (matchCh)); if (result) { gData.cp++; } } break; case REOP_UCFLAT1: { char matchCh = (char)getIndex (program, pc); pc += INDEX_LEN; result = (gData.cp != end && chars [gData.cp] == matchCh); if (result) { gData.cp++; } } break; case REOP_UCFLAT1i: { char matchCh = (char)getIndex (program, pc); pc += INDEX_LEN; result = (gData.cp != end && upcase (chars [gData.cp]) == upcase (matchCh)); if (result) { gData.cp++; } } break; case REOP_ALT: { int nextpc; sbyte nextop; pushProgState (gData, 0, 0, null, currentContinuation_pc, currentContinuation_op); nextpc = pc + getOffset (program, pc); nextop = program [nextpc++]; pushBackTrackState (gData, nextop, nextpc); pc += INDEX_LEN; op = program [pc++]; } continue; case REOP_JUMP: { int offset; REProgState state = popProgState (gData); currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; offset = getOffset (program, pc); pc += offset; op = program [pc++]; } continue; case REOP_LPAREN: { int parenIndex = getIndex (program, pc); pc += INDEX_LEN; gData.set_parens (parenIndex, gData.cp, 0); op = program [pc++]; } continue; case REOP_RPAREN: { int cap_index; int parenIndex = getIndex (program, pc); pc += INDEX_LEN; cap_index = gData.parens_index (parenIndex); gData.set_parens (parenIndex, cap_index, gData.cp - cap_index); if (parenIndex > gData.lastParen) gData.lastParen = parenIndex; op = program [pc++]; } continue; case REOP_BACKREF: { int parenIndex = getIndex (program, pc); pc += INDEX_LEN; result = backrefMatcher (gData, parenIndex, chars, end); } break; case REOP_CLASS: { int index = getIndex (program, pc); pc += INDEX_LEN; if (gData.cp != end) { if (classMatcher (gData, gData.regexp.classList [index], chars [gData.cp])) { gData.cp++; result = true; break; } } result = false; } break; case REOP_ASSERT: case REOP_ASSERT_NOT: { sbyte testOp; pushProgState (gData, 0, 0, gData.backTrackStackTop, currentContinuation_pc, currentContinuation_op); if (op == REOP_ASSERT) { testOp = REOP_ASSERTTEST; } else { testOp = REOP_ASSERTNOTTEST; } pushBackTrackState (gData, testOp, pc + getOffset (program, pc)); pc += INDEX_LEN; op = program [pc++]; } continue; case REOP_ASSERTTEST: case REOP_ASSERTNOTTEST: { REProgState state = popProgState (gData); gData.cp = state.index; gData.backTrackStackTop = state.backTrack; currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; if (result) { if (op == REOP_ASSERTTEST) { result = true; } else { result = false; } } else { if (op == REOP_ASSERTTEST) { // Do nothing } else { result = true; } } } break; case REOP_STAR: case REOP_PLUS: case REOP_OPT: case REOP_QUANT: case REOP_MINIMALSTAR: case REOP_MINIMALPLUS: case REOP_MINIMALOPT: case REOP_MINIMALQUANT: { int min, max; bool greedy = false; switch (op) { case REOP_STAR: greedy = true; // fallthrough goto case REOP_MINIMALSTAR; case REOP_MINIMALSTAR: min = 0; max = -1; break; case REOP_PLUS: greedy = true; // fallthrough goto case REOP_MINIMALPLUS; case REOP_MINIMALPLUS: min = 1; max = -1; break; case REOP_OPT: greedy = true; // fallthrough goto case REOP_MINIMALOPT; case REOP_MINIMALOPT: min = 0; max = 1; break; case REOP_QUANT: greedy = true; // fallthrough goto case REOP_MINIMALQUANT; case REOP_MINIMALQUANT: min = getOffset (program, pc); pc += INDEX_LEN; // See comments in emitREBytecode for " - 1" reason max = getOffset (program, pc) - 1; pc += INDEX_LEN; break; default: throw Context.CodeBug (); } pushProgState (gData, min, max, null, currentContinuation_pc, currentContinuation_op); if (greedy) { currentContinuation_op = REOP_REPEAT; currentContinuation_pc = pc; pushBackTrackState (gData, REOP_REPEAT, pc); /* Step over <parencount>, <parenindex> & <next> */ pc += 3 * INDEX_LEN; op = program [pc++]; } else { if (min != 0) { currentContinuation_op = REOP_MINIMALREPEAT; currentContinuation_pc = pc; /* <parencount> <parenindex> & <next> */ pc += 3 * INDEX_LEN; op = program [pc++]; } else { pushBackTrackState (gData, REOP_MINIMALREPEAT, pc); popProgState (gData); pc += 2 * INDEX_LEN; // <parencount> & <parenindex> pc = pc + getOffset (program, pc); op = program [pc++]; } } } continue; case REOP_ENDCHILD: // Use the current continuation. pc = currentContinuation_pc; op = currentContinuation_op; continue; case REOP_REPEAT: { REProgState state = popProgState (gData); if (!result) { // // There's been a failure, see if we have enough // children. // if (state.min == 0) result = true; currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; pc += 2 * INDEX_LEN; /* <parencount> & <parenindex> */ pc = pc + getOffset (program, pc); break; } else { if (state.min == 0 && gData.cp == state.index) { // matched an empty string, that'll get us nowhere result = false; currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; pc += 2 * INDEX_LEN; pc = pc + getOffset (program, pc); break; } int new_min = state.min, new_max = state.max; if (new_min != 0) new_min--; if (new_max != -1) new_max--; if (new_max == 0) { result = true; currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; pc += 2 * INDEX_LEN; pc = pc + getOffset (program, pc); break; } pushProgState (gData, new_min, new_max, null, state.continuation_pc, state.continuation_op); currentContinuation_op = REOP_REPEAT; currentContinuation_pc = pc; pushBackTrackState (gData, REOP_REPEAT, pc); int parenCount = getIndex (program, pc); pc += INDEX_LEN; int parenIndex = getIndex (program, pc); pc += 2 * INDEX_LEN; op = program [pc++]; for (int k = 0; k < parenCount; k++) { gData.set_parens (parenIndex + k, -1, 0); } } } continue; case REOP_MINIMALREPEAT: { REProgState state = popProgState (gData); if (!result) { // // Non-greedy failure - try to consume another child. // if (state.max == -1 || state.max > 0) { pushProgState (gData, state.min, state.max, null, state.continuation_pc, state.continuation_op); currentContinuation_op = REOP_MINIMALREPEAT; currentContinuation_pc = pc; int parenCount = getIndex (program, pc); pc += INDEX_LEN; int parenIndex = getIndex (program, pc); pc += 2 * INDEX_LEN; for (int k = 0; k < parenCount; k++) { gData.set_parens (parenIndex + k, -1, 0); } op = program [pc++]; continue; } else { // Don't need to adjust pc since we're going to pop. currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; break; } } else { if (state.min == 0 && gData.cp == state.index) { // Matched an empty string, that'll get us nowhere. result = false; currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; break; } int new_min = state.min, new_max = state.max; if (new_min != 0) new_min--; if (new_max != -1) new_max--; pushProgState (gData, new_min, new_max, null, state.continuation_pc, state.continuation_op); if (new_min != 0) { currentContinuation_op = REOP_MINIMALREPEAT; currentContinuation_pc = pc; int parenCount = getIndex (program, pc); pc += INDEX_LEN; int parenIndex = getIndex (program, pc); pc += 2 * INDEX_LEN; for (int k = 0; k < parenCount; k++) { gData.set_parens (parenIndex + k, -1, 0); } op = program [pc++]; } else { currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; pushBackTrackState (gData, REOP_MINIMALREPEAT, pc); popProgState (gData); pc += 2 * INDEX_LEN; pc = pc + getOffset (program, pc); op = program [pc++]; } continue; } } case REOP_END: return true; default: throw Context.CodeBug (); } /* * If the match failed and there's a backtrack option, take it. * Otherwise this is a complete and utter failure. */ if (!result) { REBackTrackData backTrackData = gData.backTrackStackTop; if (backTrackData != null) { gData.backTrackStackTop = backTrackData.previous; gData.lastParen = backTrackData.lastParen; // TODO: If backTrackData will no longer be used, then // TODO: there is no need to clone backTrackData.parens if (backTrackData.parens != null) { gData.parens = new long [backTrackData.parens.Length]; backTrackData.parens.CopyTo (gData.parens, 0); } gData.cp = backTrackData.cp; gData.stateStackTop = backTrackData.stateStackTop; currentContinuation_op = gData.stateStackTop.continuation_op; currentContinuation_pc = gData.stateStackTop.continuation_pc; pc = backTrackData.continuation_pc; op = backTrackData.continuation_op; continue; } else return false; } op = program [pc++]; } }