Beispiel #1
0
		internal static RECompiled CompileRE(Context cx, string str, string global, bool flat)
		{
			RECompiled regexp = new RECompiled(str);
			int length = str.Length;
			int flags = 0;
			if (global != null)
			{
				for (int i = 0; i < global.Length; i++)
				{
					char c = global[i];
					if (c == 'g')
					{
						flags |= JSREG_GLOB;
					}
					else
					{
						if (c == 'i')
						{
							flags |= JSREG_FOLD;
						}
						else
						{
							if (c == 'm')
							{
								flags |= JSREG_MULTILINE;
							}
							else
							{
								ReportError("msg.invalid.re.flag", c.ToString());
							}
						}
					}
				}
			}
			regexp.flags = flags;
			CompilerState state = new CompilerState(cx, regexp.source, length, flags);
			if (flat && length > 0)
			{
				state.result = new RENode(REOP_FLAT);
				state.result.chr = state.cpbegin[0];
				state.result.length = length;
				state.result.flatIndex = 0;
				state.progLength += 5;
			}
			else
			{
				if (!ParseDisjunction(state))
				{
					return null;
				}
			}
			regexp.program = new byte[state.progLength + 1];
			if (state.classCount != 0)
			{
				regexp.classList = new RECharSet[state.classCount];
				regexp.classCount = state.classCount;
			}
			int endPC = EmitREBytecode(state, regexp, 0, state.result);
			regexp.program[endPC++] = REOP_END;
			regexp.parenCount = state.parenCount;
			switch (regexp.program[0])
			{
				case REOP_UCFLAT1:
				case REOP_UCFLAT1i:
				{
					// If re starts with literal, init anchorCh accordingly
					regexp.anchorCh = (char)GetIndex(regexp.program, 1);
					break;
				}

				case REOP_FLAT1:
				case REOP_FLAT1i:
				{
					regexp.anchorCh = (char)(regexp.program[1] & unchecked((int)(0xFF)));
					break;
				}

				case REOP_FLAT:
				case REOP_FLATi:
				{
					int k = GetIndex(regexp.program, 1);
					regexp.anchorCh = regexp.source[k];
					break;
				}

				case REOP_BOL:
				{
					regexp.anchorCh = ANCHOR_BOL;
					break;
				}

				case REOP_ALT:
				{
					RENode n = state.result;
					if (n.kid.op == REOP_BOL && n.kid2.op == REOP_BOL)
					{
						regexp.anchorCh = ANCHOR_BOL;
					}
					break;
				}
			}
			return regexp;
		}
Beispiel #2
0
		internal NativeRegExp(Scriptable scope, RECompiled regexpCompiled)
		{
			this.re = regexpCompiled;
			this.lastIndex = 0;
			ScriptRuntime.SetBuiltinProtoAndParent(this, scope, TopLevel.Builtins.RegExp);
		}
Beispiel #3
0
		private static bool MatchRegExp(REGlobalData gData, RECompiled re, string input, int start, int end, bool multiline)
		{
			if (re.parenCount != 0)
			{
				gData.parens = new long[re.parenCount];
			}
			else
			{
				gData.parens = null;
			}
			gData.backTrackStackTop = null;
			gData.stateStackTop = null;
			gData.multiline = multiline || (re.flags & JSREG_MULTILINE) != 0;
			gData.regexp = re;
			int anchorCh = gData.regexp.anchorCh;
			//
			// have to include the position beyond the last character
			//  in order to detect end-of-input/line condition
			//
			for (int i = start; i <= end; ++i)
			{
				//
				// If the first node is a literal match, step the index into
				// the string until that match is made, or fail if it can't be
				// found at all.
				//
				if (anchorCh >= 0)
				{
					for (; ; )
					{
						if (i == end)
						{
							return false;
						}
						char matchCh = input[i];
						if (matchCh == anchorCh || ((gData.regexp.flags & JSREG_FOLD) != 0 && Upcase(matchCh) == Upcase((char)anchorCh)))
						{
							break;
						}
						++i;
					}
				}
				gData.cp = i;
				gData.skipped = i - start;
				for (int j = 0; j < re.parenCount; j++)
				{
					gData.parens[j] = -1l;
				}
				bool result = ExecuteREBytecode(gData, input, end);
				gData.backTrackStackTop = null;
				gData.stateStackTop = null;
				if (result)
				{
					return true;
				}
				if (anchorCh == ANCHOR_BOL && !gData.multiline)
				{
					gData.skipped = end;
					return false;
				}
				i = start + gData.skipped;
			}
			return false;
		}
Beispiel #4
0
		private static int EmitREBytecode(CompilerState state, RECompiled re, int pc, RENode t)
		{
			RENode nextAlt;
			int nextAltFixup;
			int nextTermFixup;
			byte[] program = re.program;
			while (t != null)
			{
				program[pc++] = t.op;
				switch (t.op)
				{
					case REOP_EMPTY:
					{
						--pc;
						break;
					}

					case REOP_ALTPREREQ:
					case REOP_ALTPREREQi:
					case REOP_ALTPREREQ2:
					{
						bool ignoreCase = t.op == REOP_ALTPREREQi;
						AddIndex(program, pc, ignoreCase ? Upcase(t.chr) : t.chr);
						pc += INDEX_LEN;
						AddIndex(program, pc, ignoreCase ? Upcase((char)t.index) : t.index);
						pc += INDEX_LEN;
						goto case REOP_ALT;
					}

					case REOP_ALT:
					{
						// fall through to REOP_ALT
						nextAlt = t.kid2;
						nextAltFixup = pc;
						pc += INDEX_LEN;
						pc = EmitREBytecode(state, re, pc, t.kid);
						program[pc++] = REOP_JUMP;
						nextTermFixup = pc;
						pc += INDEX_LEN;
						ResolveForwardJump(program, nextAltFixup, pc);
						pc = EmitREBytecode(state, re, pc, nextAlt);
						program[pc++] = REOP_JUMP;
						nextAltFixup = pc;
						pc += INDEX_LEN;
						ResolveForwardJump(program, nextTermFixup, pc);
						ResolveForwardJump(program, nextAltFixup, pc);
						break;
					}

					case REOP_FLAT:
					{
						if (t.flatIndex != -1)
						{
							while ((t.next != null) && (t.next.op == REOP_FLAT) && ((t.flatIndex + t.length) == t.next.flatIndex))
							{
								t.length += t.next.length;
								t.next = t.next.next;
							}
						}
						if ((t.flatIndex != -1) && (t.length > 1))
						{
							if ((state.flags & JSREG_FOLD) != 0)
							{
								program[pc - 1] = REOP_FLATi;
							}
							else
							{
								program[pc - 1] = REOP_FLAT;
							}
							pc = AddIndex(program, pc, t.flatIndex);
							pc = AddIndex(program, pc, t.length);
						}
						else
						{
							if (t.chr < 256)
							{
								if ((state.flags & JSREG_FOLD) != 0)
								{
									program[pc - 1] = REOP_FLAT1i;
								}
								else
								{
									program[pc - 1] = REOP_FLAT1;
								}
								program[pc++] = unchecked((byte)(t.chr));
							}
							else
							{
								if ((state.flags & JSREG_FOLD) != 0)
								{
									program[pc - 1] = REOP_UCFLAT1i;
								}
								else
								{
									program[pc - 1] = REOP_UCFLAT1;
								}
								pc = AddIndex(program, pc, t.chr);
							}
						}
						break;
					}

					case REOP_LPAREN:
					{
						pc = AddIndex(program, pc, t.parenIndex);
						pc = EmitREBytecode(state, re, pc, t.kid);
						program[pc++] = REOP_RPAREN;
						pc = AddIndex(program, pc, t.parenIndex);
						break;
					}

					case REOP_BACKREF:
					{
						pc = AddIndex(program, pc, t.parenIndex);
						break;
					}

					case REOP_ASSERT:
					{
						nextTermFixup = pc;
						pc += INDEX_LEN;
						pc = EmitREBytecode(state, re, pc, t.kid);
						program[pc++] = REOP_ASSERTTEST;
						ResolveForwardJump(program, nextTermFixup, pc);
						break;
					}

					case REOP_ASSERT_NOT:
					{
						nextTermFixup = pc;
						pc += INDEX_LEN;
						pc = EmitREBytecode(state, re, pc, t.kid);
						program[pc++] = REOP_ASSERTNOTTEST;
						ResolveForwardJump(program, nextTermFixup, pc);
						break;
					}

					case REOP_QUANT:
					{
						if ((t.min == 0) && (t.max == -1))
						{
							program[pc - 1] = (t.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
						}
						else
						{
							if ((t.min == 0) && (t.max == 1))
							{
								program[pc - 1] = (t.greedy) ? REOP_OPT : REOP_MINIMALOPT;
							}
							else
							{
								if ((t.min == 1) && (t.max == -1))
								{
									program[pc - 1] = (t.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
								}
								else
								{
									if (!t.greedy)
									{
										program[pc - 1] = REOP_MINIMALQUANT;
									}
									pc = AddIndex(program, pc, t.min);
									// max can be -1 which addIndex does not accept
									pc = AddIndex(program, pc, t.max + 1);
								}
							}
						}
						pc = AddIndex(program, pc, t.parenCount);
						pc = AddIndex(program, pc, t.parenIndex);
						nextTermFixup = pc;
						pc += INDEX_LEN;
						pc = EmitREBytecode(state, re, pc, t.kid);
						program[pc++] = REOP_ENDCHILD;
						ResolveForwardJump(program, nextTermFixup, pc);
						break;
					}

					case REOP_CLASS:
					{
						if (!t.sense)
						{
							program[pc - 1] = REOP_NCLASS;
						}
						pc = AddIndex(program, pc, t.index);
						re.classList[t.index] = new RECharSet(t.bmsize, t.startIndex, t.kidlen, t.sense);
						break;
					}

					default:
					{
						break;
					}
				}
				t = t.next;
			}
			return pc;
		}