/* * indexp is assumed to be an array of length 1 */ internal virtual object executeRegExp(Context cx, IScriptable scopeObj, RegExpImpl res, string str, int [] indexp, int matchType) { REGlobalData gData = new REGlobalData (); int start = indexp [0]; char [] charArray = str.ToCharArray (); int end = charArray.Length; if (start > end) start = end; // // Call the recursive matcher to do the real work. // bool matches = matchRegExp (gData, re, charArray, start, end, res.multiline); if (!matches) { if (matchType != PREFIX) return null; return Undefined.Value; } int index = gData.cp; int i = index; indexp [0] = i; int matchlen = i - (start + gData.skipped); int ep = index; index -= matchlen; object result; IScriptable obj; if (matchType == TEST) { /* * Testing for a match and updating cx.regExpImpl: don't allocate * an array object, do return true. */ result = true; obj = null; } else { /* * The array returned on match has element 0 bound to the matched * string, elements 1 through re.parenCount bound to the paren * matches, an index property telling the length of the left context, * and an input property referring to the input string. */ IScriptable scope = GetTopLevelScope (scopeObj); result = ScriptRuntime.NewObject (cx, scope, "Array", null); obj = (IScriptable)result; string matchstr = new string (charArray, index, matchlen); obj.Put (0, obj, matchstr); } if (re.parenCount == 0) { res.parens = null; res.lastParen = SubString.EmptySubString; } else { SubString parsub = null; int num; res.parens = new SubString [re.parenCount]; for (num = 0; num < re.parenCount; num++) { int cap_index = gData.parens_index (num); string parstr; if (cap_index != -1) { int cap_length = gData.parens_length (num); parsub = new SubString (charArray, cap_index, cap_length); res.parens [num] = parsub; if (matchType == TEST) continue; parstr = parsub.ToString (); obj.Put (num + 1, obj, parstr); } else { if (matchType != TEST) obj.Put (num + 1, obj, Undefined.Value); } } res.lastParen = parsub; } if (!(matchType == TEST)) { /* * Define the index and input properties last for better for/in loop * order (so they come after the elements). */ obj.Put ("index", obj, (object)(start + gData.skipped)); obj.Put ("input", obj, str); } if (res.lastMatch == null) { res.lastMatch = new SubString (); res.leftContext = new SubString (); res.rightContext = new SubString (); } res.lastMatch.charArray = charArray; res.lastMatch.index = index; res.lastMatch.length = matchlen; res.leftContext.charArray = charArray; if (cx.Version == Context.Versions.JS1_2) { /* * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used * in scalar contexts, and unintentionally for the string.match "list" * psuedo-context. On "hi there bye", the following would result: * * Language while(/ /g){print("$`");} s/ /$`/g * perl4.036 "hi", "there" "hihitherehi therebye" * perl5 "hi", "hi there" "hihitherehi therebye" * js1.2 "hi", "there" "hihitheretherebye" * * Insofar as JS1.2 always defined $` as "left context from the last * match" for global regexps, it was more consistent than perl4. */ res.leftContext.index = start; res.leftContext.length = gData.skipped; } else { /* * For JS1.3 and ECMAv2, emulate Perl5 exactly: * * js1.3 "hi", "hi there" "hihitherehi therebye" */ res.leftContext.index = 0; res.leftContext.length = start + gData.skipped; } res.rightContext.charArray = charArray; res.rightContext.index = ep; res.rightContext.length = end - ep; return result; }
/* 1. Evaluate DecimalEscape to obtain an EscapeValue E. 2. If E is not a character then go to step 6. 3. Let ch be E's character. 4. Let A be a one-element RECharSet containing the character ch. 5. Call CharacterSetMatcher(A, false) and return its Matcher result. 6. E must be an integer. Let n be that integer. 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception. 8. Return an internal Matcher closure that takes two arguments, a State x and a Continuation c, and performs the following: 1. Let cap be x's captures internal array. 2. Let s be cap[n]. 3. If s is undefined, then call c(x) and return its result. 4. Let e be x's endIndex. 5. Let len be s's length. 6. Let f be e+len. 7. If f>InputLength, return failure. 8. If there exists an integer i between 0 (inclusive) and len (exclusive) such that Canonicalize(s[i]) is not the same character as Canonicalize(Input [e+i]), then return failure. 9. Let y be the State (f, cap). 10. Call c(y) and return its result. */ private static bool backrefMatcher(REGlobalData gData, int parenIndex, char [] chars, int end) { int len; int i; int parenContent = gData.parens_index (parenIndex); if (parenContent == -1) return true; len = gData.parens_length (parenIndex); if ((gData.cp + len) > end) return false; if ((gData.regexp.flags & JSREG_FOLD) != 0) { for (i = 0; i < len; i++) { if (upcase (chars [parenContent + i]) != upcase (chars [gData.cp + i])) return false; } } else { for (i = 0; i < len; i++) { if (chars [parenContent + i] != chars [gData.cp + i]) return false; } } gData.cp += len; return true; }
private static bool executeREBytecode(REGlobalData gData, char [] chars, int end) { int pc = 0; sbyte [] program = gData.regexp.program; int currentContinuation_op; int currentContinuation_pc; bool result = false; currentContinuation_pc = 0; currentContinuation_op = REOP_END; if (debug) { System.Console.Out.WriteLine ("Input = \"" + new string (chars) + "\", start at " + gData.cp); } int op = program [pc++]; for (; ; ) { if (debug) { System.Console.Out.WriteLine ("Testing at " + gData.cp + ", op = " + op); } switch (op) { case REOP_EMPTY: result = true; break; case REOP_BOL: if (gData.cp != 0) { if (gData.multiline || ((gData.regexp.flags & JSREG_MULTILINE) != 0)) { if (!isLineTerm (chars [gData.cp - 1])) { result = false; break; } } else { result = false; break; } } result = true; break; case REOP_EOL: if (gData.cp != end) { if (gData.multiline || ((gData.regexp.flags & JSREG_MULTILINE) != 0)) { if (!isLineTerm (chars [gData.cp])) { result = false; break; } } else { result = false; break; } } result = true; break; case REOP_WBDRY: result = ((gData.cp == 0 || !isWord (chars [gData.cp - 1])) ^ !((gData.cp < end) && isWord (chars [gData.cp]))); break; case REOP_WNONBDRY: result = ((gData.cp == 0 || !isWord (chars [gData.cp - 1])) ^ ((gData.cp < end) && isWord (chars [gData.cp]))); break; case REOP_DOT: result = (gData.cp != end && !isLineTerm (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_DIGIT: result = (gData.cp != end && isDigit (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_NONDIGIT: result = (gData.cp != end && !isDigit (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_SPACE: result = (gData.cp != end && isREWhiteSpace (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_NONSPACE: result = (gData.cp != end && !isREWhiteSpace (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_ALNUM: result = (gData.cp != end && isWord (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_NONALNUM: result = (gData.cp != end && !isWord (chars [gData.cp])); if (result) { gData.cp++; } break; case REOP_FLAT: { int offset = getIndex (program, pc); pc += INDEX_LEN; int length = getIndex (program, pc); pc += INDEX_LEN; result = flatNMatcher (gData, offset, length, chars, end); } break; case REOP_FLATi: { int offset = getIndex (program, pc); pc += INDEX_LEN; int length = getIndex (program, pc); pc += INDEX_LEN; result = flatNIMatcher (gData, offset, length, chars, end); } break; case REOP_FLAT1: { char matchCh = (char)(program [pc++] & 0xFF); result = (gData.cp != end && chars [gData.cp] == matchCh); if (result) { gData.cp++; } } break; case REOP_FLAT1i: { char matchCh = (char)(program [pc++] & 0xFF); result = (gData.cp != end && upcase (chars [gData.cp]) == upcase (matchCh)); if (result) { gData.cp++; } } break; case REOP_UCFLAT1: { char matchCh = (char)getIndex (program, pc); pc += INDEX_LEN; result = (gData.cp != end && chars [gData.cp] == matchCh); if (result) { gData.cp++; } } break; case REOP_UCFLAT1i: { char matchCh = (char)getIndex (program, pc); pc += INDEX_LEN; result = (gData.cp != end && upcase (chars [gData.cp]) == upcase (matchCh)); if (result) { gData.cp++; } } break; case REOP_ALT: { int nextpc; sbyte nextop; pushProgState (gData, 0, 0, null, currentContinuation_pc, currentContinuation_op); nextpc = pc + getOffset (program, pc); nextop = program [nextpc++]; pushBackTrackState (gData, nextop, nextpc); pc += INDEX_LEN; op = program [pc++]; } continue; case REOP_JUMP: { int offset; REProgState state = popProgState (gData); currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; offset = getOffset (program, pc); pc += offset; op = program [pc++]; } continue; case REOP_LPAREN: { int parenIndex = getIndex (program, pc); pc += INDEX_LEN; gData.set_parens (parenIndex, gData.cp, 0); op = program [pc++]; } continue; case REOP_RPAREN: { int cap_index; int parenIndex = getIndex (program, pc); pc += INDEX_LEN; cap_index = gData.parens_index (parenIndex); gData.set_parens (parenIndex, cap_index, gData.cp - cap_index); if (parenIndex > gData.lastParen) gData.lastParen = parenIndex; op = program [pc++]; } continue; case REOP_BACKREF: { int parenIndex = getIndex (program, pc); pc += INDEX_LEN; result = backrefMatcher (gData, parenIndex, chars, end); } break; case REOP_CLASS: { int index = getIndex (program, pc); pc += INDEX_LEN; if (gData.cp != end) { if (classMatcher (gData, gData.regexp.classList [index], chars [gData.cp])) { gData.cp++; result = true; break; } } result = false; } break; case REOP_ASSERT: case REOP_ASSERT_NOT: { sbyte testOp; pushProgState (gData, 0, 0, gData.backTrackStackTop, currentContinuation_pc, currentContinuation_op); if (op == REOP_ASSERT) { testOp = REOP_ASSERTTEST; } else { testOp = REOP_ASSERTNOTTEST; } pushBackTrackState (gData, testOp, pc + getOffset (program, pc)); pc += INDEX_LEN; op = program [pc++]; } continue; case REOP_ASSERTTEST: case REOP_ASSERTNOTTEST: { REProgState state = popProgState (gData); gData.cp = state.index; gData.backTrackStackTop = state.backTrack; currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; if (result) { if (op == REOP_ASSERTTEST) { result = true; } else { result = false; } } else { if (op == REOP_ASSERTTEST) { // Do nothing } else { result = true; } } } break; case REOP_STAR: case REOP_PLUS: case REOP_OPT: case REOP_QUANT: case REOP_MINIMALSTAR: case REOP_MINIMALPLUS: case REOP_MINIMALOPT: case REOP_MINIMALQUANT: { int min, max; bool greedy = false; switch (op) { case REOP_STAR: greedy = true; // fallthrough goto case REOP_MINIMALSTAR; case REOP_MINIMALSTAR: min = 0; max = -1; break; case REOP_PLUS: greedy = true; // fallthrough goto case REOP_MINIMALPLUS; case REOP_MINIMALPLUS: min = 1; max = -1; break; case REOP_OPT: greedy = true; // fallthrough goto case REOP_MINIMALOPT; case REOP_MINIMALOPT: min = 0; max = 1; break; case REOP_QUANT: greedy = true; // fallthrough goto case REOP_MINIMALQUANT; case REOP_MINIMALQUANT: min = getOffset (program, pc); pc += INDEX_LEN; // See comments in emitREBytecode for " - 1" reason max = getOffset (program, pc) - 1; pc += INDEX_LEN; break; default: throw Context.CodeBug (); } pushProgState (gData, min, max, null, currentContinuation_pc, currentContinuation_op); if (greedy) { currentContinuation_op = REOP_REPEAT; currentContinuation_pc = pc; pushBackTrackState (gData, REOP_REPEAT, pc); /* Step over <parencount>, <parenindex> & <next> */ pc += 3 * INDEX_LEN; op = program [pc++]; } else { if (min != 0) { currentContinuation_op = REOP_MINIMALREPEAT; currentContinuation_pc = pc; /* <parencount> <parenindex> & <next> */ pc += 3 * INDEX_LEN; op = program [pc++]; } else { pushBackTrackState (gData, REOP_MINIMALREPEAT, pc); popProgState (gData); pc += 2 * INDEX_LEN; // <parencount> & <parenindex> pc = pc + getOffset (program, pc); op = program [pc++]; } } } continue; case REOP_ENDCHILD: // Use the current continuation. pc = currentContinuation_pc; op = currentContinuation_op; continue; case REOP_REPEAT: { REProgState state = popProgState (gData); if (!result) { // // There's been a failure, see if we have enough // children. // if (state.min == 0) result = true; currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; pc += 2 * INDEX_LEN; /* <parencount> & <parenindex> */ pc = pc + getOffset (program, pc); break; } else { if (state.min == 0 && gData.cp == state.index) { // matched an empty string, that'll get us nowhere result = false; currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; pc += 2 * INDEX_LEN; pc = pc + getOffset (program, pc); break; } int new_min = state.min, new_max = state.max; if (new_min != 0) new_min--; if (new_max != -1) new_max--; if (new_max == 0) { result = true; currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; pc += 2 * INDEX_LEN; pc = pc + getOffset (program, pc); break; } pushProgState (gData, new_min, new_max, null, state.continuation_pc, state.continuation_op); currentContinuation_op = REOP_REPEAT; currentContinuation_pc = pc; pushBackTrackState (gData, REOP_REPEAT, pc); int parenCount = getIndex (program, pc); pc += INDEX_LEN; int parenIndex = getIndex (program, pc); pc += 2 * INDEX_LEN; op = program [pc++]; for (int k = 0; k < parenCount; k++) { gData.set_parens (parenIndex + k, -1, 0); } } } continue; case REOP_MINIMALREPEAT: { REProgState state = popProgState (gData); if (!result) { // // Non-greedy failure - try to consume another child. // if (state.max == -1 || state.max > 0) { pushProgState (gData, state.min, state.max, null, state.continuation_pc, state.continuation_op); currentContinuation_op = REOP_MINIMALREPEAT; currentContinuation_pc = pc; int parenCount = getIndex (program, pc); pc += INDEX_LEN; int parenIndex = getIndex (program, pc); pc += 2 * INDEX_LEN; for (int k = 0; k < parenCount; k++) { gData.set_parens (parenIndex + k, -1, 0); } op = program [pc++]; continue; } else { // Don't need to adjust pc since we're going to pop. currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; break; } } else { if (state.min == 0 && gData.cp == state.index) { // Matched an empty string, that'll get us nowhere. result = false; currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; break; } int new_min = state.min, new_max = state.max; if (new_min != 0) new_min--; if (new_max != -1) new_max--; pushProgState (gData, new_min, new_max, null, state.continuation_pc, state.continuation_op); if (new_min != 0) { currentContinuation_op = REOP_MINIMALREPEAT; currentContinuation_pc = pc; int parenCount = getIndex (program, pc); pc += INDEX_LEN; int parenIndex = getIndex (program, pc); pc += 2 * INDEX_LEN; for (int k = 0; k < parenCount; k++) { gData.set_parens (parenIndex + k, -1, 0); } op = program [pc++]; } else { currentContinuation_pc = state.continuation_pc; currentContinuation_op = state.continuation_op; pushBackTrackState (gData, REOP_MINIMALREPEAT, pc); popProgState (gData); pc += 2 * INDEX_LEN; pc = pc + getOffset (program, pc); op = program [pc++]; } continue; } } case REOP_END: return true; default: throw Context.CodeBug (); } /* * If the match failed and there's a backtrack option, take it. * Otherwise this is a complete and utter failure. */ if (!result) { REBackTrackData backTrackData = gData.backTrackStackTop; if (backTrackData != null) { gData.backTrackStackTop = backTrackData.previous; gData.lastParen = backTrackData.lastParen; // TODO: If backTrackData will no longer be used, then // TODO: there is no need to clone backTrackData.parens if (backTrackData.parens != null) { gData.parens = new long [backTrackData.parens.Length]; backTrackData.parens.CopyTo (gData.parens, 0); } gData.cp = backTrackData.cp; gData.stateStackTop = backTrackData.stateStackTop; currentContinuation_op = gData.stateStackTop.continuation_op; currentContinuation_pc = gData.stateStackTop.continuation_pc; pc = backTrackData.continuation_pc; op = backTrackData.continuation_op; continue; } else return false; } op = program [pc++]; } }