internal virtual IScriptable compile(Context cx, IScriptable scope, object [] args) { if (args.Length > 0 && args [0] is BuiltinRegExp) { if (args.Length > 1 && args [1] != Undefined.Value) { // report error throw ScriptRuntime.TypeErrorById ("msg.bad.regexp.compile"); } BuiltinRegExp thatObj = (BuiltinRegExp)args [0]; this.re = thatObj.re; this.lastIndex = thatObj.lastIndex; return this; } string s = args.Length == 0 ? "" : ScriptConvert.ToString (args [0]); string global = args.Length > 1 && args [1] != Undefined.Value ? ScriptConvert.ToString (args [1]) : null; this.re = (RECompiled)compileRE (s, global, false); this.lastIndex = 0; return this; }
internal static object compileRE(string str, string global, bool flat) { RECompiled regexp = new RECompiled (); regexp.source = str.ToCharArray (); int length = str.Length; int flags = 0; if (global != null) { for (int i = 0; i < global.Length; i++) { char c = global [i]; if (c == 'g') { flags |= JSREG_GLOB; } else if (c == 'i') { flags |= JSREG_FOLD; } else if (c == 'm') { flags |= JSREG_MULTILINE; } else { reportError ("msg.invalid.re.flag", Convert.ToString (c)); } } } regexp.flags = flags; CompilerState state = new CompilerState (regexp.source, length, flags); if (flat && length > 0) { if (debug) { System.Console.Out.WriteLine ("flat = \"" + str + "\""); } state.result = new RENode (REOP_FLAT); state.result.chr = state.cpbegin [0]; state.result.length = length; state.result.flatIndex = 0; state.progLength += 5; } else if (!parseDisjunction (state)) return null; regexp.program = new sbyte [state.progLength + 1]; if (state.classCount != 0) { regexp.classList = new RECharSet [state.classCount]; regexp.classCount = state.classCount; } int endPC = emitREBytecode (state, regexp, 0, state.result); regexp.program [endPC++] = REOP_END; if (debug) { System.Console.Out.WriteLine ("Prog. length = " + endPC); for (int i = 0; i < endPC; i++) { System.Console.Out.Write (DebugNameOp ((sbyte)regexp.program [i])); if (i < (endPC - 1)) System.Console.Out.Write (", "); } System.Console.Out.WriteLine (); } regexp.parenCount = state.parenCount; // If re starts with literal, init anchorCh accordingly switch (regexp.program [0]) { case REOP_UCFLAT1: case REOP_UCFLAT1i: regexp.anchorCh = (char)getIndex (regexp.program, 1); break; case REOP_FLAT1: case REOP_FLAT1i: regexp.anchorCh = (char)(regexp.program [1] & 0xFF); break; case REOP_FLAT: case REOP_FLATi: int k = getIndex (regexp.program, 1); regexp.anchorCh = regexp.source [k]; break; } if (debug) { if (regexp.anchorCh >= 0) { System.Console.Out.WriteLine ("Anchor ch = '" + (char)regexp.anchorCh + "'"); } } return regexp; }
internal BuiltinRegExp(IScriptable scope, object regexpCompiled) { this.re = (RECompiled)regexpCompiled; this.lastIndex = 0; ScriptRuntime.setObjectProtoAndParent (this, scope); }
private static bool matchRegExp(REGlobalData gData, RECompiled re, char [] chars, int start, int end, bool multiline) { if (re.parenCount != 0) { gData.parens = new long [re.parenCount]; } else { gData.parens = null; } gData.backTrackStackTop = null; gData.stateStackTop = null; gData.multiline = multiline; gData.regexp = re; gData.lastParen = 0; int anchorCh = gData.regexp.anchorCh; // // have to include the position beyond the last character // in order to detect end-of-input/line condition // for (int i = start; i <= end; ++i) { // // If the first node is a literal match, step the index into // the string until that match is made, or fail if it can't be // found at all. // if (anchorCh >= 0) { for (; ; ) { if (i == end) { return false; } char matchCh = chars [i]; if (matchCh == anchorCh || ((gData.regexp.flags & JSREG_FOLD) != 0 && upcase (matchCh) == upcase ((char)anchorCh))) { break; } ++i; } } gData.cp = i; for (int j = 0; j < re.parenCount; j++) { gData.set_parens (j, -1, 0); } bool result = executeREBytecode (gData, chars, end); gData.backTrackStackTop = null; gData.stateStackTop = null; if (result) { gData.skipped = i - start; return true; } } return false; }
private static int emitREBytecode(CompilerState state, RECompiled re, int pc, RENode t) { RENode nextAlt; int nextAltFixup, nextTermFixup; sbyte [] program = re.program; while (t != null) { program [pc++] = t.op; switch (t.op) { case REOP_EMPTY: --pc; break; case REOP_ALT: nextAlt = t.kid2; nextAltFixup = pc; /* address of next alternate */ pc += OFFSET_LEN; pc = emitREBytecode (state, re, pc, t.kid); program [pc++] = REOP_JUMP; nextTermFixup = pc; /* address of following term */ pc += OFFSET_LEN; resolveForwardJump (program, nextAltFixup, pc); pc = emitREBytecode (state, re, pc, nextAlt); program [pc++] = REOP_JUMP; nextAltFixup = pc; pc += OFFSET_LEN; resolveForwardJump (program, nextTermFixup, pc); resolveForwardJump (program, nextAltFixup, pc); break; case REOP_FLAT: /* * Consecutize FLAT's if possible. */ if (t.flatIndex != -1) { while ((t.next != null) && (t.next.op == REOP_FLAT) && ((t.flatIndex + t.length) == t.next.flatIndex)) { t.length += t.next.length; t.next = t.next.next; } } if ((t.flatIndex != -1) && (t.length > 1)) { if ((state.flags & JSREG_FOLD) != 0) program [pc - 1] = REOP_FLATi; else program [pc - 1] = REOP_FLAT; pc = addIndex (program, pc, t.flatIndex); pc = addIndex (program, pc, t.length); } else { if (t.chr < 256) { if ((state.flags & JSREG_FOLD) != 0) program [pc - 1] = REOP_FLAT1i; else program [pc - 1] = REOP_FLAT1; program [pc++] = (sbyte)(t.chr); } else { if ((state.flags & JSREG_FOLD) != 0) program [pc - 1] = REOP_UCFLAT1i; else program [pc - 1] = REOP_UCFLAT1; pc = addIndex (program, pc, t.chr); } } break; case REOP_LPAREN: pc = addIndex (program, pc, t.parenIndex); pc = emitREBytecode (state, re, pc, t.kid); program [pc++] = REOP_RPAREN; pc = addIndex (program, pc, t.parenIndex); break; case REOP_BACKREF: pc = addIndex (program, pc, t.parenIndex); break; case REOP_ASSERT: nextTermFixup = pc; pc += OFFSET_LEN; pc = emitREBytecode (state, re, pc, t.kid); program [pc++] = REOP_ASSERTTEST; resolveForwardJump (program, nextTermFixup, pc); break; case REOP_ASSERT_NOT: nextTermFixup = pc; pc += OFFSET_LEN; pc = emitREBytecode (state, re, pc, t.kid); program [pc++] = REOP_ASSERTNOTTEST; resolveForwardJump (program, nextTermFixup, pc); break; case REOP_QUANT: if ((t.min == 0) && (t.max == -1)) program [pc - 1] = (t.greedy) ? REOP_STAR : REOP_MINIMALSTAR; else if ((t.min == 0) && (t.max == 1)) program [pc - 1] = (t.greedy) ? REOP_OPT : REOP_MINIMALOPT; else if ((t.min == 1) && (t.max == -1)) program [pc - 1] = (t.greedy) ? REOP_PLUS : REOP_MINIMALPLUS; else { if (!t.greedy) program [pc - 1] = REOP_MINIMALQUANT; pc = addIndex (program, pc, t.min); // max can be -1 which addIndex does not accept pc = addIndex (program, pc, t.max + 1); } pc = addIndex (program, pc, t.parenCount); pc = addIndex (program, pc, t.parenIndex); nextTermFixup = pc; pc += OFFSET_LEN; pc = emitREBytecode (state, re, pc, t.kid); program [pc++] = REOP_ENDCHILD; resolveForwardJump (program, nextTermFixup, pc); break; case REOP_CLASS: pc = addIndex (program, pc, t.index); re.classList [t.index] = new RECharSet (t.bmsize, t.startIndex, t.kidlen); break; default: break; } t = t.next; } return pc; }