예제 #1
0
        private static bool matchRegExp(REGlobalData gData, RECompiled re, char [] chars, int start, int end, bool multiline)
        {
            if (re.parenCount != 0) {
                gData.parens = new long [re.parenCount];
            }
            else {
                gData.parens = null;
            }

            gData.backTrackStackTop = null;

            gData.stateStackTop = null;

            gData.multiline = multiline;
            gData.regexp = re;
            gData.lastParen = 0;

            int anchorCh = gData.regexp.anchorCh;
            //
            // have to include the position beyond the last character
            //  in order to detect end-of-input/line condition
            //
            for (int i = start; i <= end; ++i) {
                //
                // If the first node is a literal match, step the index into
                // the string until that match is made, or fail if it can't be
                // found at all.
                //
                if (anchorCh >= 0) {
                    for (; ; ) {
                        if (i == end) {
                            return false;
                        }
                        char matchCh = chars [i];
                        if (matchCh == anchorCh || ((gData.regexp.flags & JSREG_FOLD) != 0 && upcase (matchCh) == upcase ((char)anchorCh))) {
                            break;
                        }
                        ++i;
                    }
                }
                gData.cp = i;
                for (int j = 0; j < re.parenCount; j++) {
                    gData.set_parens (j, -1, 0);
                }
                bool result = executeREBytecode (gData, chars, end);

                gData.backTrackStackTop = null;
                gData.stateStackTop = null;
                if (result) {
                    gData.skipped = i - start;
                    return true;
                }
            }
            return false;
        }
예제 #2
0
        private static bool executeREBytecode(REGlobalData gData, char [] chars, int end)
        {
            int pc = 0;
            sbyte [] program = gData.regexp.program;
            int currentContinuation_op;
            int currentContinuation_pc;
            bool result = false;

            currentContinuation_pc = 0;
            currentContinuation_op = REOP_END;
            if (debug) {
                System.Console.Out.WriteLine ("Input = \"" + new string (chars) + "\", start at " + gData.cp);
            }
            int op = program [pc++];
            for (; ; ) {
                if (debug) {
                    System.Console.Out.WriteLine ("Testing at " + gData.cp + ", op = " + op);
                }
                switch (op) {

                    case REOP_EMPTY:
                        result = true;
                        break;

                    case REOP_BOL:
                        if (gData.cp != 0) {
                            if (gData.multiline || ((gData.regexp.flags & JSREG_MULTILINE) != 0)) {
                                if (!isLineTerm (chars [gData.cp - 1])) {
                                    result = false;
                                    break;
                                }
                            }
                            else {
                                result = false;
                                break;
                            }
                        }
                        result = true;
                        break;

                    case REOP_EOL:
                        if (gData.cp != end) {
                            if (gData.multiline || ((gData.regexp.flags & JSREG_MULTILINE) != 0)) {
                                if (!isLineTerm (chars [gData.cp])) {
                                    result = false;
                                    break;
                                }
                            }
                            else {
                                result = false;
                                break;
                            }
                        }
                        result = true;
                        break;

                    case REOP_WBDRY:
                        result = ((gData.cp == 0 || !isWord (chars [gData.cp - 1])) ^ !((gData.cp < end) && isWord (chars [gData.cp])));
                        break;

                    case REOP_WNONBDRY:
                        result = ((gData.cp == 0 || !isWord (chars [gData.cp - 1])) ^ ((gData.cp < end) && isWord (chars [gData.cp])));
                        break;

                    case REOP_DOT:
                        result = (gData.cp != end && !isLineTerm (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_DIGIT:
                        result = (gData.cp != end && isDigit (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_NONDIGIT:
                        result = (gData.cp != end && !isDigit (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_SPACE:
                        result = (gData.cp != end && isREWhiteSpace (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_NONSPACE:
                        result = (gData.cp != end && !isREWhiteSpace (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_ALNUM:
                        result = (gData.cp != end && isWord (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_NONALNUM:
                        result = (gData.cp != end && !isWord (chars [gData.cp]));
                        if (result) {
                            gData.cp++;
                        }
                        break;

                    case REOP_FLAT: {
                            int offset = getIndex (program, pc);
                            pc += INDEX_LEN;
                            int length = getIndex (program, pc);
                            pc += INDEX_LEN;
                            result = flatNMatcher (gData, offset, length, chars, end);
                        }
                        break;

                    case REOP_FLATi: {
                            int offset = getIndex (program, pc);
                            pc += INDEX_LEN;
                            int length = getIndex (program, pc);
                            pc += INDEX_LEN;
                            result = flatNIMatcher (gData, offset, length, chars, end);
                        }
                        break;

                    case REOP_FLAT1: {
                            char matchCh = (char)(program [pc++] & 0xFF);
                            result = (gData.cp != end && chars [gData.cp] == matchCh);
                            if (result) {
                                gData.cp++;
                            }
                        }
                        break;

                    case REOP_FLAT1i: {
                            char matchCh = (char)(program [pc++] & 0xFF);
                            result = (gData.cp != end && upcase (chars [gData.cp]) == upcase (matchCh));
                            if (result) {
                                gData.cp++;
                            }
                        }
                        break;

                    case REOP_UCFLAT1: {
                            char matchCh = (char)getIndex (program, pc);
                            pc += INDEX_LEN;
                            result = (gData.cp != end && chars [gData.cp] == matchCh);
                            if (result) {
                                gData.cp++;
                            }
                        }
                        break;

                    case REOP_UCFLAT1i: {
                            char matchCh = (char)getIndex (program, pc);
                            pc += INDEX_LEN;
                            result = (gData.cp != end && upcase (chars [gData.cp]) == upcase (matchCh));
                            if (result) {
                                gData.cp++;
                            }
                        }
                        break;

                    case REOP_ALT: {
                            int nextpc;
                            sbyte nextop;
                            pushProgState (gData, 0, 0, null, currentContinuation_pc, currentContinuation_op);
                            nextpc = pc + getOffset (program, pc);
                            nextop = program [nextpc++];
                            pushBackTrackState (gData, nextop, nextpc);
                            pc += INDEX_LEN;
                            op = program [pc++];
                        }
                        continue;

                    case REOP_JUMP: {
                            int offset;
                            REProgState state = popProgState (gData);
                            currentContinuation_pc = state.continuation_pc;
                            currentContinuation_op = state.continuation_op;
                            offset = getOffset (program, pc);
                            pc += offset;
                            op = program [pc++];
                        }
                        continue;

                    case REOP_LPAREN: {
                            int parenIndex = getIndex (program, pc);
                            pc += INDEX_LEN;
                            gData.set_parens (parenIndex, gData.cp, 0);
                            op = program [pc++];
                        }
                        continue;

                    case REOP_RPAREN: {
                            int cap_index;
                            int parenIndex = getIndex (program, pc);
                            pc += INDEX_LEN;
                            cap_index = gData.parens_index (parenIndex);
                            gData.set_parens (parenIndex, cap_index, gData.cp - cap_index);
                            if (parenIndex > gData.lastParen)
                                gData.lastParen = parenIndex;
                            op = program [pc++];
                        }
                        continue;

                    case REOP_BACKREF: {
                            int parenIndex = getIndex (program, pc);
                            pc += INDEX_LEN;
                            result = backrefMatcher (gData, parenIndex, chars, end);
                        }
                        break;

                    case REOP_CLASS: {
                            int index = getIndex (program, pc);
                            pc += INDEX_LEN;
                            if (gData.cp != end) {
                                if (classMatcher (gData, gData.regexp.classList [index], chars [gData.cp])) {
                                    gData.cp++;
                                    result = true;
                                    break;
                                }
                            }
                            result = false;
                        }
                        break;

                    case REOP_ASSERT:
                    case REOP_ASSERT_NOT: {
                            sbyte testOp;
                            pushProgState (gData, 0, 0, gData.backTrackStackTop, currentContinuation_pc, currentContinuation_op);
                            if (op == REOP_ASSERT) {
                                testOp = REOP_ASSERTTEST;
                            }
                            else {
                                testOp = REOP_ASSERTNOTTEST;
                            }
                            pushBackTrackState (gData, testOp, pc + getOffset (program, pc));
                            pc += INDEX_LEN;
                            op = program [pc++];
                        }
                        continue;

                    case REOP_ASSERTTEST:
                    case REOP_ASSERTNOTTEST: {
                            REProgState state = popProgState (gData);
                            gData.cp = state.index;
                            gData.backTrackStackTop = state.backTrack;
                            currentContinuation_pc = state.continuation_pc;
                            currentContinuation_op = state.continuation_op;
                            if (result) {
                                if (op == REOP_ASSERTTEST) {
                                    result = true;
                                }
                                else {
                                    result = false;
                                }
                            }
                            else {
                                if (op == REOP_ASSERTTEST) {
                                    // Do nothing
                                }
                                else {
                                    result = true;
                                }
                            }
                        }
                        break;

                    case REOP_STAR:
                    case REOP_PLUS:
                    case REOP_OPT:
                    case REOP_QUANT:
                    case REOP_MINIMALSTAR:
                    case REOP_MINIMALPLUS:
                    case REOP_MINIMALOPT:
                    case REOP_MINIMALQUANT: {
                            int min, max;
                            bool greedy = false;
                            switch (op) {

                                case REOP_STAR:
                                    greedy = true;
                                    // fallthrough
                                    goto case REOP_MINIMALSTAR;

                                case REOP_MINIMALSTAR:
                                    min = 0;
                                    max = -1;
                                    break;

                                case REOP_PLUS:
                                    greedy = true;
                                    // fallthrough
                                    goto case REOP_MINIMALPLUS;

                                case REOP_MINIMALPLUS:
                                    min = 1;
                                    max = -1;
                                    break;

                                case REOP_OPT:
                                    greedy = true;
                                    // fallthrough
                                    goto case REOP_MINIMALOPT;

                                case REOP_MINIMALOPT:
                                    min = 0;
                                    max = 1;
                                    break;

                                case REOP_QUANT:
                                    greedy = true;
                                    // fallthrough
                                    goto case REOP_MINIMALQUANT;

                                case REOP_MINIMALQUANT:
                                    min = getOffset (program, pc);
                                    pc += INDEX_LEN;
                                    // See comments in emitREBytecode for " - 1" reason
                                    max = getOffset (program, pc) - 1;
                                    pc += INDEX_LEN;
                                    break;

                                default:
                                    throw Context.CodeBug ();

                            }
                            pushProgState (gData, min, max, null, currentContinuation_pc, currentContinuation_op);
                            if (greedy) {
                                currentContinuation_op = REOP_REPEAT;
                                currentContinuation_pc = pc;
                                pushBackTrackState (gData, REOP_REPEAT, pc);
                                /* Step over <parencount>, <parenindex> & <next> */
                                pc += 3 * INDEX_LEN;
                                op = program [pc++];
                            }
                            else {
                                if (min != 0) {
                                    currentContinuation_op = REOP_MINIMALREPEAT;
                                    currentContinuation_pc = pc;
                                    /* <parencount> <parenindex> & <next> */
                                    pc += 3 * INDEX_LEN;
                                    op = program [pc++];
                                }
                                else {
                                    pushBackTrackState (gData, REOP_MINIMALREPEAT, pc);
                                    popProgState (gData);
                                    pc += 2 * INDEX_LEN; // <parencount> & <parenindex>
                                    pc = pc + getOffset (program, pc);
                                    op = program [pc++];
                                }
                            }
                        }
                        continue;

                    case REOP_ENDCHILD:
                        // Use the current continuation.
                        pc = currentContinuation_pc;
                        op = currentContinuation_op;
                        continue;

                    case REOP_REPEAT: {
                            REProgState state = popProgState (gData);
                            if (!result) {
                                //
                                // There's been a failure, see if we have enough
                                // children.
                                //
                                if (state.min == 0)
                                    result = true;
                                currentContinuation_pc = state.continuation_pc;
                                currentContinuation_op = state.continuation_op;
                                pc += 2 * INDEX_LEN; /* <parencount> & <parenindex> */
                                pc = pc + getOffset (program, pc);
                                break;
                            }
                            else {
                                if (state.min == 0 && gData.cp == state.index) {
                                    // matched an empty string, that'll get us nowhere
                                    result = false;
                                    currentContinuation_pc = state.continuation_pc;
                                    currentContinuation_op = state.continuation_op;
                                    pc += 2 * INDEX_LEN;
                                    pc = pc + getOffset (program, pc);
                                    break;
                                }
                                int new_min = state.min, new_max = state.max;
                                if (new_min != 0)
                                    new_min--;
                                if (new_max != -1)
                                    new_max--;
                                if (new_max == 0) {
                                    result = true;
                                    currentContinuation_pc = state.continuation_pc;
                                    currentContinuation_op = state.continuation_op;
                                    pc += 2 * INDEX_LEN;
                                    pc = pc + getOffset (program, pc);
                                    break;
                                }
                                pushProgState (gData, new_min, new_max, null, state.continuation_pc, state.continuation_op);
                                currentContinuation_op = REOP_REPEAT;
                                currentContinuation_pc = pc;
                                pushBackTrackState (gData, REOP_REPEAT, pc);
                                int parenCount = getIndex (program, pc);
                                pc += INDEX_LEN;
                                int parenIndex = getIndex (program, pc);
                                pc += 2 * INDEX_LEN;
                                op = program [pc++];
                                for (int k = 0; k < parenCount; k++) {
                                    gData.set_parens (parenIndex + k, -1, 0);
                                }
                            }
                        }
                        continue;

                    case REOP_MINIMALREPEAT: {
                            REProgState state = popProgState (gData);
                            if (!result) {
                                //
                                // Non-greedy failure - try to consume another child.
                                //
                                if (state.max == -1 || state.max > 0) {
                                    pushProgState (gData, state.min, state.max, null, state.continuation_pc, state.continuation_op);
                                    currentContinuation_op = REOP_MINIMALREPEAT;
                                    currentContinuation_pc = pc;
                                    int parenCount = getIndex (program, pc);
                                    pc += INDEX_LEN;
                                    int parenIndex = getIndex (program, pc);
                                    pc += 2 * INDEX_LEN;
                                    for (int k = 0; k < parenCount; k++) {
                                        gData.set_parens (parenIndex + k, -1, 0);
                                    }
                                    op = program [pc++];
                                    continue;
                                }
                                else {
                                    // Don't need to adjust pc since we're going to pop.
                                    currentContinuation_pc = state.continuation_pc;
                                    currentContinuation_op = state.continuation_op;
                                    break;
                                }
                            }
                            else {
                                if (state.min == 0 && gData.cp == state.index) {
                                    // Matched an empty string, that'll get us nowhere.
                                    result = false;
                                    currentContinuation_pc = state.continuation_pc;
                                    currentContinuation_op = state.continuation_op;
                                    break;
                                }
                                int new_min = state.min, new_max = state.max;
                                if (new_min != 0)
                                    new_min--;
                                if (new_max != -1)
                                    new_max--;
                                pushProgState (gData, new_min, new_max, null, state.continuation_pc, state.continuation_op);
                                if (new_min != 0) {
                                    currentContinuation_op = REOP_MINIMALREPEAT;
                                    currentContinuation_pc = pc;
                                    int parenCount = getIndex (program, pc);
                                    pc += INDEX_LEN;
                                    int parenIndex = getIndex (program, pc);
                                    pc += 2 * INDEX_LEN;
                                    for (int k = 0; k < parenCount; k++) {
                                        gData.set_parens (parenIndex + k, -1, 0);
                                    }
                                    op = program [pc++];
                                }
                                else {
                                    currentContinuation_pc = state.continuation_pc;
                                    currentContinuation_op = state.continuation_op;
                                    pushBackTrackState (gData, REOP_MINIMALREPEAT, pc);
                                    popProgState (gData);
                                    pc += 2 * INDEX_LEN;
                                    pc = pc + getOffset (program, pc);
                                    op = program [pc++];
                                }
                                continue;
                            }
                        }

                    case REOP_END:
                        return true;

                    default:
                        throw Context.CodeBug ();

                }
                /*
                *  If the match failed and there's a backtrack option, take it.
                *  Otherwise this is a complete and utter failure.
                */
                if (!result) {
                    REBackTrackData backTrackData = gData.backTrackStackTop;
                    if (backTrackData != null) {
                        gData.backTrackStackTop = backTrackData.previous;

                        gData.lastParen = backTrackData.lastParen;

                        // TODO: If backTrackData will no longer be used, then
                        // TODO: there is no need to clone backTrackData.parens
                        if (backTrackData.parens != null) {
                            gData.parens = new long [backTrackData.parens.Length];
                            backTrackData.parens.CopyTo (gData.parens, 0);
                        }

                        gData.cp = backTrackData.cp;

                        gData.stateStackTop = backTrackData.stateStackTop;

                        currentContinuation_op = gData.stateStackTop.continuation_op;
                        currentContinuation_pc = gData.stateStackTop.continuation_pc;
                        pc = backTrackData.continuation_pc;
                        op = backTrackData.continuation_op;
                        continue;
                    }
                    else
                        return false;
                }

                op = program [pc++];
            }
        }