예제 #1
0
 internal virtual IScriptable compile(Context cx, IScriptable scope, object [] args)
 {
     if (args.Length > 0 && args [0] is BuiltinRegExp) {
         if (args.Length > 1 && args [1] != Undefined.Value) {
             // report error
             throw ScriptRuntime.TypeErrorById ("msg.bad.regexp.compile");
         }
         BuiltinRegExp thatObj = (BuiltinRegExp)args [0];
         this.re = thatObj.re;
         this.lastIndex = thatObj.lastIndex;
         return this;
     }
     string s = args.Length == 0 ? "" : ScriptConvert.ToString (args [0]);
     string global = args.Length > 1 && args [1] != Undefined.Value ? ScriptConvert.ToString (args [1]) : null;
     this.re = (RECompiled)compileRE (s, global, false);
     this.lastIndex = 0;
     return this;
 }
예제 #2
0
        internal static object compileRE(string str, string global, bool flat)
        {
            RECompiled regexp = new RECompiled ();
            regexp.source = str.ToCharArray ();
            int length = str.Length;

            int flags = 0;
            if (global != null) {
                for (int i = 0; i < global.Length; i++) {
                    char c = global [i];
                    if (c == 'g') {
                        flags |= JSREG_GLOB;
                    }
                    else if (c == 'i') {
                        flags |= JSREG_FOLD;
                    }
                    else if (c == 'm') {
                        flags |= JSREG_MULTILINE;
                    }
                    else {
                        reportError ("msg.invalid.re.flag", Convert.ToString (c));
                    }
                }
            }
            regexp.flags = flags;

            CompilerState state = new CompilerState (regexp.source, length, flags);
            if (flat && length > 0) {
                if (debug) {
                    System.Console.Out.WriteLine ("flat = \"" + str + "\"");
                }
                state.result = new RENode (REOP_FLAT);
                state.result.chr = state.cpbegin [0];
                state.result.length = length;
                state.result.flatIndex = 0;
                state.progLength += 5;
            }
            else if (!parseDisjunction (state))
                return null;

            regexp.program = new sbyte [state.progLength + 1];
            if (state.classCount != 0) {
                regexp.classList = new RECharSet [state.classCount];
                regexp.classCount = state.classCount;
            }
            int endPC = emitREBytecode (state, regexp, 0, state.result);
            regexp.program [endPC++] = REOP_END;

            if (debug) {
                System.Console.Out.WriteLine ("Prog. length = " + endPC);
                for (int i = 0; i < endPC; i++) {
                    System.Console.Out.Write (DebugNameOp ((sbyte)regexp.program [i]));
                    if (i < (endPC - 1))
                        System.Console.Out.Write (", ");
                }
                System.Console.Out.WriteLine ();
            }
            regexp.parenCount = state.parenCount;

            // If re starts with literal, init anchorCh accordingly
            switch (regexp.program [0]) {

                case REOP_UCFLAT1:
                case REOP_UCFLAT1i:
                    regexp.anchorCh = (char)getIndex (regexp.program, 1);
                    break;

                case REOP_FLAT1:
                case REOP_FLAT1i:
                    regexp.anchorCh = (char)(regexp.program [1] & 0xFF);
                    break;

                case REOP_FLAT:
                case REOP_FLATi:
                    int k = getIndex (regexp.program, 1);
                    regexp.anchorCh = regexp.source [k];
                    break;
            }

            if (debug) {
                if (regexp.anchorCh >= 0) {
                    System.Console.Out.WriteLine ("Anchor ch = '" + (char)regexp.anchorCh + "'");
                }
            }
            return regexp;
        }
예제 #3
0
 internal BuiltinRegExp(IScriptable scope, object regexpCompiled)
 {
     this.re = (RECompiled)regexpCompiled;
     this.lastIndex = 0;
     ScriptRuntime.setObjectProtoAndParent (this, scope);
 }
예제 #4
0
        private static bool matchRegExp(REGlobalData gData, RECompiled re, char [] chars, int start, int end, bool multiline)
        {
            if (re.parenCount != 0) {
                gData.parens = new long [re.parenCount];
            }
            else {
                gData.parens = null;
            }

            gData.backTrackStackTop = null;

            gData.stateStackTop = null;

            gData.multiline = multiline;
            gData.regexp = re;
            gData.lastParen = 0;

            int anchorCh = gData.regexp.anchorCh;
            //
            // have to include the position beyond the last character
            //  in order to detect end-of-input/line condition
            //
            for (int i = start; i <= end; ++i) {
                //
                // If the first node is a literal match, step the index into
                // the string until that match is made, or fail if it can't be
                // found at all.
                //
                if (anchorCh >= 0) {
                    for (; ; ) {
                        if (i == end) {
                            return false;
                        }
                        char matchCh = chars [i];
                        if (matchCh == anchorCh || ((gData.regexp.flags & JSREG_FOLD) != 0 && upcase (matchCh) == upcase ((char)anchorCh))) {
                            break;
                        }
                        ++i;
                    }
                }
                gData.cp = i;
                for (int j = 0; j < re.parenCount; j++) {
                    gData.set_parens (j, -1, 0);
                }
                bool result = executeREBytecode (gData, chars, end);

                gData.backTrackStackTop = null;
                gData.stateStackTop = null;
                if (result) {
                    gData.skipped = i - start;
                    return true;
                }
            }
            return false;
        }
예제 #5
0
        private static int emitREBytecode(CompilerState state, RECompiled re, int pc, RENode t)
        {
            RENode nextAlt;
            int nextAltFixup, nextTermFixup;
            sbyte [] program = re.program;

            while (t != null) {
                program [pc++] = t.op;
                switch (t.op) {

                    case REOP_EMPTY:
                        --pc;
                        break;

                    case REOP_ALT:
                        nextAlt = t.kid2;
                        nextAltFixup = pc; /* address of next alternate */
                        pc += OFFSET_LEN;
                        pc = emitREBytecode (state, re, pc, t.kid);
                        program [pc++] = REOP_JUMP;
                        nextTermFixup = pc; /* address of following term */
                        pc += OFFSET_LEN;
                        resolveForwardJump (program, nextAltFixup, pc);
                        pc = emitREBytecode (state, re, pc, nextAlt);

                        program [pc++] = REOP_JUMP;
                        nextAltFixup = pc;
                        pc += OFFSET_LEN;

                        resolveForwardJump (program, nextTermFixup, pc);
                        resolveForwardJump (program, nextAltFixup, pc);
                        break;

                    case REOP_FLAT:
                        /*
                        * Consecutize FLAT's if possible.
                        */
                        if (t.flatIndex != -1) {
                            while ((t.next != null) && (t.next.op == REOP_FLAT) && ((t.flatIndex + t.length) == t.next.flatIndex)) {
                                t.length += t.next.length;
                                t.next = t.next.next;
                            }
                        }
                        if ((t.flatIndex != -1) && (t.length > 1)) {
                            if ((state.flags & JSREG_FOLD) != 0)
                                program [pc - 1] = REOP_FLATi;
                            else
                                program [pc - 1] = REOP_FLAT;
                            pc = addIndex (program, pc, t.flatIndex);
                            pc = addIndex (program, pc, t.length);
                        }
                        else {
                            if (t.chr < 256) {
                                if ((state.flags & JSREG_FOLD) != 0)
                                    program [pc - 1] = REOP_FLAT1i;
                                else
                                    program [pc - 1] = REOP_FLAT1;
                                program [pc++] = (sbyte)(t.chr);
                            }
                            else {
                                if ((state.flags & JSREG_FOLD) != 0)
                                    program [pc - 1] = REOP_UCFLAT1i;
                                else
                                    program [pc - 1] = REOP_UCFLAT1;
                                pc = addIndex (program, pc, t.chr);
                            }
                        }
                        break;

                    case REOP_LPAREN:
                        pc = addIndex (program, pc, t.parenIndex);
                        pc = emitREBytecode (state, re, pc, t.kid);
                        program [pc++] = REOP_RPAREN;
                        pc = addIndex (program, pc, t.parenIndex);
                        break;

                    case REOP_BACKREF:
                        pc = addIndex (program, pc, t.parenIndex);
                        break;

                    case REOP_ASSERT:
                        nextTermFixup = pc;
                        pc += OFFSET_LEN;
                        pc = emitREBytecode (state, re, pc, t.kid);
                        program [pc++] = REOP_ASSERTTEST;
                        resolveForwardJump (program, nextTermFixup, pc);
                        break;

                    case REOP_ASSERT_NOT:
                        nextTermFixup = pc;
                        pc += OFFSET_LEN;
                        pc = emitREBytecode (state, re, pc, t.kid);
                        program [pc++] = REOP_ASSERTNOTTEST;
                        resolveForwardJump (program, nextTermFixup, pc);
                        break;

                    case REOP_QUANT:
                        if ((t.min == 0) && (t.max == -1))
                            program [pc - 1] = (t.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
                        else if ((t.min == 0) && (t.max == 1))
                            program [pc - 1] = (t.greedy) ? REOP_OPT : REOP_MINIMALOPT;
                        else if ((t.min == 1) && (t.max == -1))
                            program [pc - 1] = (t.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
                        else {
                            if (!t.greedy)
                                program [pc - 1] = REOP_MINIMALQUANT;
                            pc = addIndex (program, pc, t.min);
                            // max can be -1 which addIndex does not accept
                            pc = addIndex (program, pc, t.max + 1);
                        }
                        pc = addIndex (program, pc, t.parenCount);
                        pc = addIndex (program, pc, t.parenIndex);
                        nextTermFixup = pc;
                        pc += OFFSET_LEN;
                        pc = emitREBytecode (state, re, pc, t.kid);
                        program [pc++] = REOP_ENDCHILD;
                        resolveForwardJump (program, nextTermFixup, pc);
                        break;

                    case REOP_CLASS:
                        pc = addIndex (program, pc, t.index);
                        re.classList [t.index] = new RECharSet (t.bmsize, t.startIndex, t.kidlen);
                        break;

                    default:
                        break;

                }
                t = t.next;
            }
            return pc;
        }