예제 #1
0
        /*
        * indexp is assumed to be an array of length 1
        */
        internal virtual object executeRegExp(Context cx, IScriptable scopeObj, RegExpImpl res, string str, int [] indexp, int matchType)
        {
            REGlobalData gData = new REGlobalData ();

            int start = indexp [0];
            char [] charArray = str.ToCharArray ();
            int end = charArray.Length;
            if (start > end)
                start = end;
            //
            // Call the recursive matcher to do the real work.
            //
            bool matches = matchRegExp (gData, re, charArray, start, end, res.multiline);
            if (!matches) {
                if (matchType != PREFIX)
                    return null;
                return Undefined.Value;
            }
            int index = gData.cp;
            int i = index;
            indexp [0] = i;
            int matchlen = i - (start + gData.skipped);
            int ep = index;
            index -= matchlen;
            object result;
            IScriptable obj;

            if (matchType == TEST) {
                /*
                * Testing for a match and updating cx.regExpImpl: don't allocate
                * an array object, do return true.
                */
                result = true;
                obj = null;
            }
            else {
                /*
                * The array returned on match has element 0 bound to the matched
                * string, elements 1 through re.parenCount bound to the paren
                * matches, an index property telling the length of the left context,
                * and an input property referring to the input string.
                */
                IScriptable scope = GetTopLevelScope (scopeObj);
                result = ScriptRuntime.NewObject (cx, scope, "Array", null);
                obj = (IScriptable)result;

                string matchstr = new string (charArray, index, matchlen);
                obj.Put (0, obj, matchstr);
            }

            if (re.parenCount == 0) {
                res.parens = null;
                res.lastParen = SubString.EmptySubString;
            }
            else {
                SubString parsub = null;
                int num;
                res.parens = new SubString [re.parenCount];
                for (num = 0; num < re.parenCount; num++) {
                    int cap_index = gData.parens_index (num);
                    string parstr;
                    if (cap_index != -1) {
                        int cap_length = gData.parens_length (num);
                        parsub = new SubString (charArray, cap_index, cap_length);
                        res.parens [num] = parsub;
                        if (matchType == TEST)
                            continue;
                        parstr = parsub.ToString ();
                        obj.Put (num + 1, obj, parstr);
                    }
                    else {
                        if (matchType != TEST)
                            obj.Put (num + 1, obj, Undefined.Value);
                    }
                }
                res.lastParen = parsub;
            }

            if (!(matchType == TEST)) {
                /*
                * Define the index and input properties last for better for/in loop
                * order (so they come after the elements).
                */
                obj.Put ("index", obj, (object)(start + gData.skipped));
                obj.Put ("input", obj, str);
            }

            if (res.lastMatch == null) {
                res.lastMatch = new SubString ();
                res.leftContext = new SubString ();
                res.rightContext = new SubString ();
            }
            res.lastMatch.charArray = charArray;
            res.lastMatch.index = index;
            res.lastMatch.length = matchlen;

            res.leftContext.charArray = charArray;
            if (cx.Version == Context.Versions.JS1_2) {
                /*
                * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used
                * in scalar contexts, and unintentionally for the string.match "list"
                * psuedo-context.  On "hi there bye", the following would result:
                *
                * Language     while(/ /g){print("$`");}   s/ /$`/g
                * perl4.036    "hi", "there"               "hihitherehi therebye"
                * perl5        "hi", "hi there"            "hihitherehi therebye"
                * js1.2        "hi", "there"               "hihitheretherebye"
                *
                * Insofar as JS1.2 always defined $` as "left context from the last
                * match" for global regexps, it was more consistent than perl4.
                */
                res.leftContext.index = start;
                res.leftContext.length = gData.skipped;
            }
            else {
                /*
                * For JS1.3 and ECMAv2, emulate Perl5 exactly:
                *
                * js1.3        "hi", "hi there"            "hihitherehi therebye"
                */
                res.leftContext.index = 0;
                res.leftContext.length = start + gData.skipped;
            }

            res.rightContext.charArray = charArray;
            res.rightContext.index = ep;
            res.rightContext.length = end - ep;

            return result;
        }
예제 #2
0
        /*
        1. Evaluate DecimalEscape to obtain an EscapeValue E.
        2. If E is not a character then go to step 6.
        3. Let ch be E's character.
        4. Let A be a one-element RECharSet containing the character ch.
        5. Call CharacterSetMatcher(A, false) and return its Matcher result.
        6. E must be an integer. Let n be that integer.
        7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
        8. Return an internal Matcher closure that takes two arguments, a State x
        and a Continuation c, and performs the following:
        1. Let cap be x's captures internal array.
        2. Let s be cap[n].
        3. If s is undefined, then call c(x) and return its result.
        4. Let e be x's endIndex.
        5. Let len be s's length.
        6. Let f be e+len.
        7. If f>InputLength, return failure.
        8. If there exists an integer i between 0 (inclusive) and len (exclusive)
        such that Canonicalize(s[i]) is not the same character as
        Canonicalize(Input [e+i]), then return failure.
        9. Let y be the State (f, cap).
        10. Call c(y) and return its result.
        */
        private static bool backrefMatcher(REGlobalData gData, int parenIndex, char [] chars, int end)
        {
            int len;
            int i;
            int parenContent = gData.parens_index (parenIndex);
            if (parenContent == -1)
                return true;

            len = gData.parens_length (parenIndex);
            if ((gData.cp + len) > end)
                return false;

            if ((gData.regexp.flags & JSREG_FOLD) != 0) {
                for (i = 0; i < len; i++) {
                    if (upcase (chars [parenContent + i]) != upcase (chars [gData.cp + i]))
                        return false;
                }
            }
            else {
                for (i = 0; i < len; i++) {
                    if (chars [parenContent + i] != chars [gData.cp + i])
                        return false;
                }
            }
            gData.cp += len;
            return true;
        }
예제 #3
0
        private static bool executeREBytecode(REGlobalData gData, char [] chars, int end)
        {
            int pc = 0;
            sbyte [] program = gData.regexp.program;
            int currentContinuation_op;
            int currentContinuation_pc;
            bool result = false;

            currentContinuation_pc = 0;
            currentContinuation_op = REOP_END;
            if (debug) {
                System.Console.Out.WriteLine ("Input = \"" + new string (chars) + "\", start at " + gData.cp);
            }
            int op = program [pc++];
            for (; ; ) {
                if (debug) {
                    System.Console.Out.WriteLine ("Testing at " + gData.cp + ", op = " + op);
                }
                switch (op) {

                    case REOP_EMPTY:
                        result = true;
                        break;

                    case REOP_BOL:
                        if (gData.cp != 0) {
                            if (gData.multiline || ((gData.regexp.flags & JSREG_MULTILINE) != 0)) {
                                if (!isLineTerm (chars [gData.cp - 1])) {
                                    result = false;
                                    break;
                                }
                            }
                            else {
                                result = false;
                                break;
                            }
                        }
                        result = true;
                        break;

                    case REOP_EOL:
                        if (gData.cp != end) {
                            if (gData.multiline || ((gData.regexp.flags & JSREG_MULTILINE) != 0)) {
                                if (!isLineTerm (chars [gData.cp])) {
                                    result = false;
                                    break;
                                }
                            }
                            else {
                                result = false;
                                break;
                            }
                        }
                        result = true;
                        break;

                    case REOP_WBDRY:
                        result = ((gData.cp == 0 || !isWord (chars [gData.cp - 1])) ^ !((gData.cp < end) && isWord (chars [gData.cp])));
                        break;

                    case REOP_WNONBDRY:
                        result = ((gData.cp == 0 || !isWord (chars [gData.cp - 1])) ^ ((gData.cp < end) && isWord (chars [gData.cp])));
                        break;

                    case REOP_DOT:
                        result = (gData.cp != end && !isLineTerm (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_DIGIT:
                        result = (gData.cp != end && isDigit (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_NONDIGIT:
                        result = (gData.cp != end && !isDigit (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_SPACE:
                        result = (gData.cp != end && isREWhiteSpace (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_NONSPACE:
                        result = (gData.cp != end && !isREWhiteSpace (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_ALNUM:
                        result = (gData.cp != end && isWord (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_NONALNUM:
                        result = (gData.cp != end && !isWord (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_FLAT: {
                            int offset = getIndex (program, pc);
                            pc += INDEX_LEN;
                            int length = getIndex (program, pc);
                            pc += INDEX_LEN;
                            result = flatNMatcher (gData, offset, length, chars, end);
                        }
                        break;

                    case REOP_FLATi: {
                            int offset = getIndex (program, pc);
                            pc += INDEX_LEN;
                            int length = getIndex (program, pc);
                            pc += INDEX_LEN;
                            result = flatNIMatcher (gData, offset, length, chars, end);
                        }
                        break;

                    case REOP_FLAT1: {
                            char matchCh = (char)(program [pc++] & 0xFF);
                            result = (gData.cp != end && chars [gData.cp] == matchCh);
                            if (result) {
                                gData.cp++;
                            }
                        }
                        break;

                    case REOP_FLAT1i: {
                            char matchCh = (char)(program [pc++] & 0xFF);
                            result = (gData.cp != end && upcase (chars [gData.cp]) == upcase (matchCh));
                            if (result) {
                                gData.cp++;
                            }
                        }
                        break;

                    case REOP_UCFLAT1: {
                            char matchCh = (char)getIndex (program, pc);
                            pc += INDEX_LEN;
                            result = (gData.cp != end && chars [gData.cp] == matchCh);
                            if (result) {
                                gData.cp++;
                            }
                        }
                        break;

                    case REOP_UCFLAT1i: {
                            char matchCh = (char)getIndex (program, pc);
                            pc += INDEX_LEN;
                            result = (gData.cp != end && upcase (chars [gData.cp]) == upcase (matchCh));
                            if (result) {
                                gData.cp++;
                            }
                        }
                        break;

                    case REOP_ALT: {
                            int nextpc;
                            sbyte nextop;
                            pushProgState (gData, 0, 0, null, currentContinuation_pc, currentContinuation_op);
                            nextpc = pc + getOffset (program, pc);
                            nextop = program [nextpc++];
                            pushBackTrackState (gData, nextop, nextpc);
                            pc += INDEX_LEN;
                            op = program [pc++];
                        }
                        continue;

                    case REOP_JUMP: {
                            int offset;
                            REProgState state = popProgState (gData);
                            currentContinuation_pc = state.continuation_pc;
                            currentContinuation_op = state.continuation_op;
                            offset = getOffset (program, pc);
                            pc += offset;
                            op = program [pc++];
                        }
                        continue;

                    case REOP_LPAREN: {
                            int parenIndex = getIndex (program, pc);
                            pc += INDEX_LEN;
                            gData.set_parens (parenIndex, gData.cp, 0);
                            op = program [pc++];
                        }
                        continue;

                    case REOP_RPAREN: {
                            int cap_index;
                            int parenIndex = getIndex (program, pc);
                            pc += INDEX_LEN;
                            cap_index = gData.parens_index (parenIndex);
                            gData.set_parens (parenIndex, cap_index, gData.cp - cap_index);
                            if (parenIndex > gData.lastParen)
                                gData.lastParen = parenIndex;
                            op = program [pc++];
                        }
                        continue;

                    case REOP_BACKREF: {
                            int parenIndex = getIndex (program, pc);
                            pc += INDEX_LEN;
                            result = backrefMatcher (gData, parenIndex, chars, end);
                        }
                        break;

                    case REOP_CLASS: {
                            int index = getIndex (program, pc);
                            pc += INDEX_LEN;
                            if (gData.cp != end) {
                                if (classMatcher (gData, gData.regexp.classList [index], chars [gData.cp])) {
                                    gData.cp++;
                                    result = true;
                                    break;
                                }
                            }
                            result = false;
                        }
                        break;

                    case REOP_ASSERT:
                    case REOP_ASSERT_NOT: {
                            sbyte testOp;
                            pushProgState (gData, 0, 0, gData.backTrackStackTop, currentContinuation_pc, currentContinuation_op);
                            if (op == REOP_ASSERT) {
                                testOp = REOP_ASSERTTEST;
                            }
                            else {
                                testOp = REOP_ASSERTNOTTEST;
                            }
                            pushBackTrackState (gData, testOp, pc + getOffset (program, pc));
                            pc += INDEX_LEN;
                            op = program [pc++];
                        }
                        continue;

                    case REOP_ASSERTTEST:
                    case REOP_ASSERTNOTTEST: {
                            REProgState state = popProgState (gData);
                            gData.cp = state.index;
                            gData.backTrackStackTop = state.backTrack;
                            currentContinuation_pc = state.continuation_pc;
                            currentContinuation_op = state.continuation_op;
                            if (result) {
                                if (op == REOP_ASSERTTEST) {
                                    result = true;
                                }
                                else {
                                    result = false;
                                }
                            }
                            else {
                                if (op == REOP_ASSERTTEST) {
                                    // Do nothing
                                }
                                else {
                                    result = true;
                                }
                            }
                        }
                        break;

                    case REOP_STAR:
                    case REOP_PLUS:
                    case REOP_OPT:
                    case REOP_QUANT:
                    case REOP_MINIMALSTAR:
                    case REOP_MINIMALPLUS:
                    case REOP_MINIMALOPT:
                    case REOP_MINIMALQUANT: {
                            int min, max;
                            bool greedy = false;
                            switch (op) {

                                case REOP_STAR:
                                    greedy = true;
                                    // fallthrough
                                    goto case REOP_MINIMALSTAR;

                                case REOP_MINIMALSTAR:
                                    min = 0;
                                    max = -1;
                                    break;

                                case REOP_PLUS:
                                    greedy = true;
                                    // fallthrough
                                    goto case REOP_MINIMALPLUS;

                                case REOP_MINIMALPLUS:
                                    min = 1;
                                    max = -1;
                                    break;

                                case REOP_OPT:
                                    greedy = true;
                                    // fallthrough
                                    goto case REOP_MINIMALOPT;

                                case REOP_MINIMALOPT:
                                    min = 0;
                                    max = 1;
                                    break;

                                case REOP_QUANT:
                                    greedy = true;
                                    // fallthrough
                                    goto case REOP_MINIMALQUANT;

                                case REOP_MINIMALQUANT:
                                    min = getOffset (program, pc);
                                    pc += INDEX_LEN;
                                    // See comments in emitREBytecode for " - 1" reason
                                    max = getOffset (program, pc) - 1;
                                    pc += INDEX_LEN;
                                    break;

                                default:
                                    throw Context.CodeBug ();

                            }
                            pushProgState (gData, min, max, null, currentContinuation_pc, currentContinuation_op);
                            if (greedy) {
                                currentContinuation_op = REOP_REPEAT;
                                currentContinuation_pc = pc;
                                pushBackTrackState (gData, REOP_REPEAT, pc);
                                /* Step over <parencount>, <parenindex> & <next> */
                                pc += 3 * INDEX_LEN;
                                op = program [pc++];
                            }
                            else {
                                if (min != 0) {
                                    currentContinuation_op = REOP_MINIMALREPEAT;
                                    currentContinuation_pc = pc;
                                    /* <parencount> <parenindex> & <next> */
                                    pc += 3 * INDEX_LEN;
                                    op = program [pc++];
                                }
                                else {
                                    pushBackTrackState (gData, REOP_MINIMALREPEAT, pc);
                                    popProgState (gData);
                                    pc += 2 * INDEX_LEN; // <parencount> & <parenindex>
                                    pc = pc + getOffset (program, pc);
                                    op = program [pc++];
                                }
                            }
                        }
                        continue;

                    case REOP_ENDCHILD:
                        // Use the current continuation.
                        pc = currentContinuation_pc;
                        op = currentContinuation_op;
                        continue;

                    case REOP_REPEAT: {
                            REProgState state = popProgState (gData);
                            if (!result) {
                                //
                                // There's been a failure, see if we have enough
                                // children.
                                //
                                if (state.min == 0)
                                    result = true;
                                currentContinuation_pc = state.continuation_pc;
                                currentContinuation_op = state.continuation_op;
                                pc += 2 * INDEX_LEN; /* <parencount> & <parenindex> */
                                pc = pc + getOffset (program, pc);
                                break;
                            }
                            else {
                                if (state.min == 0 && gData.cp == state.index) {
                                    // matched an empty string, that'll get us nowhere
                                    result = false;
                                    currentContinuation_pc = state.continuation_pc;
                                    currentContinuation_op = state.continuation_op;
                                    pc += 2 * INDEX_LEN;
                                    pc = pc + getOffset (program, pc);
                                    break;
                                }
                                int new_min = state.min, new_max = state.max;
                                if (new_min != 0)
                                    new_min--;
                                if (new_max != -1)
                                    new_max--;
                                if (new_max == 0) {
                                    result = true;
                                    currentContinuation_pc = state.continuation_pc;
                                    currentContinuation_op = state.continuation_op;
                                    pc += 2 * INDEX_LEN;
                                    pc = pc + getOffset (program, pc);
                                    break;
                                }
                                pushProgState (gData, new_min, new_max, null, state.continuation_pc, state.continuation_op);
                                currentContinuation_op = REOP_REPEAT;
                                currentContinuation_pc = pc;
                                pushBackTrackState (gData, REOP_REPEAT, pc);
                                int parenCount = getIndex (program, pc);
                                pc += INDEX_LEN;
                                int parenIndex = getIndex (program, pc);
                                pc += 2 * INDEX_LEN;
                                op = program [pc++];
                                for (int k = 0; k < parenCount; k++) {
                                    gData.set_parens (parenIndex + k, -1, 0);
                                }
                            }
                        }
                        continue;

                    case REOP_MINIMALREPEAT: {
                            REProgState state = popProgState (gData);
                            if (!result) {
                                //
                                // Non-greedy failure - try to consume another child.
                                //
                                if (state.max == -1 || state.max > 0) {
                                    pushProgState (gData, state.min, state.max, null, state.continuation_pc, state.continuation_op);
                                    currentContinuation_op = REOP_MINIMALREPEAT;
                                    currentContinuation_pc = pc;
                                    int parenCount = getIndex (program, pc);
                                    pc += INDEX_LEN;
                                    int parenIndex = getIndex (program, pc);
                                    pc += 2 * INDEX_LEN;
                                    for (int k = 0; k < parenCount; k++) {
                                        gData.set_parens (parenIndex + k, -1, 0);
                                    }
                                    op = program [pc++];
                                    continue;
                                }
                                else {
                                    // Don't need to adjust pc since we're going to pop.
                                    currentContinuation_pc = state.continuation_pc;
                                    currentContinuation_op = state.continuation_op;
                                    break;
                                }
                            }
                            else {
                                if (state.min == 0 && gData.cp == state.index) {
                                    // Matched an empty string, that'll get us nowhere.
                                    result = false;
                                    currentContinuation_pc = state.continuation_pc;
                                    currentContinuation_op = state.continuation_op;
                                    break;
                                }
                                int new_min = state.min, new_max = state.max;
                                if (new_min != 0)
                                    new_min--;
                                if (new_max != -1)
                                    new_max--;
                                pushProgState (gData, new_min, new_max, null, state.continuation_pc, state.continuation_op);
                                if (new_min != 0) {
                                    currentContinuation_op = REOP_MINIMALREPEAT;
                                    currentContinuation_pc = pc;
                                    int parenCount = getIndex (program, pc);
                                    pc += INDEX_LEN;
                                    int parenIndex = getIndex (program, pc);
                                    pc += 2 * INDEX_LEN;
                                    for (int k = 0; k < parenCount; k++) {
                                        gData.set_parens (parenIndex + k, -1, 0);
                                    }
                                    op = program [pc++];
                                }
                                else {
                                    currentContinuation_pc = state.continuation_pc;
                                    currentContinuation_op = state.continuation_op;
                                    pushBackTrackState (gData, REOP_MINIMALREPEAT, pc);
                                    popProgState (gData);
                                    pc += 2 * INDEX_LEN;
                                    pc = pc + getOffset (program, pc);
                                    op = program [pc++];
                                }
                                continue;
                            }
                        }

                    case REOP_END:
                        return true;

                    default:
                        throw Context.CodeBug ();

                }
                /*
                *  If the match failed and there's a backtrack option, take it.
                *  Otherwise this is a complete and utter failure.
                */
                if (!result) {
                    REBackTrackData backTrackData = gData.backTrackStackTop;
                    if (backTrackData != null) {
                        gData.backTrackStackTop = backTrackData.previous;

                        gData.lastParen = backTrackData.lastParen;

                        // TODO: If backTrackData will no longer be used, then
                        // TODO: there is no need to clone backTrackData.parens
                        if (backTrackData.parens != null) {
                            gData.parens = new long [backTrackData.parens.Length];
                            backTrackData.parens.CopyTo (gData.parens, 0);
                        }

                        gData.cp = backTrackData.cp;

                        gData.stateStackTop = backTrackData.stateStackTop;

                        currentContinuation_op = gData.stateStackTop.continuation_op;
                        currentContinuation_pc = gData.stateStackTop.continuation_pc;
                        pc = backTrackData.continuation_pc;
                        op = backTrackData.continuation_op;
                        continue;
                    }
                    else
                        return false;
                }

                op = program [pc++];
            }
        }