Ejemplo n.º 1
0
		// private methods

		private void ParseGroup (Group group, RegexOptions options, Assertion assertion) {
			bool is_top_level = group is RegularExpression;
		
			Alternation alternation = null;
			string literal = null;

			Group current = new Group ();
			Expression expr = null;
			bool closed = false;

			while (true) {
				ConsumeWhitespace (IsIgnorePatternWhitespace (options));
				if (ptr >= pattern.Length)
					break;
				
				// (1) Parse for Expressions
			
				char ch = pattern[ptr ++];
				
				switch (ch) {
				case '^': {
					Position pos =
						IsMultiline (options) ? Position.StartOfLine : Position.Start;
					expr = new PositionAssertion (pos);
					break;
				}

				case '$': {
					Position pos =
						IsMultiline (options) ? Position.EndOfLine : Position.End;
					expr = new PositionAssertion (pos);
					break;
				}

				case '.': {
					Category cat =
						IsSingleline (options) ? Category.AnySingleline : Category.Any;
					expr = new CharacterClass (cat, false);
					break;
				}

				case '\\': {
					int c = ParseEscape (false);
					if (c >= 0)
						ch = (char)c;
					else {
						expr = ParseSpecial (options);

						if (expr == null)
							ch = pattern[ptr ++];		// default escape
					}
					break;
				}

				case '[': {
					expr = ParseCharacterClass (options);
					break;
				}

				case '(': {
					bool ignore = IsIgnoreCase (options);
					expr = ParseGroupingConstruct (ref options);
					if (expr == null) {
						if (literal != null && IsIgnoreCase (options) != ignore) {
							current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
							literal = null;
						}

						continue;
					}
					break;
				}

				case ')': {
					closed = true;
					goto EndOfGroup;
				}

				case '|': {
					if (literal != null) {
						current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
						literal = null;
					}

					if (assertion != null) {
						if (assertion.TrueExpression == null)
							assertion.TrueExpression = current;
						else if (assertion.FalseExpression == null)
							assertion.FalseExpression = current;
						else
							throw NewParseException ("Too many | in (?()|).");
					}
					else {
						if (alternation == null)
							alternation = new Alternation ();

						alternation.AddAlternative (current);
					}

					current = new Group ();
					continue;
				}

				case '*': case '+': case '?': {
					throw NewParseException ("Bad quantifier.");
				}

				default: 
					break;		// literal character
				}

				ConsumeWhitespace (IsIgnorePatternWhitespace (options));
				
				// (2) Check for Repetitions
				
				if (ptr < pattern.Length) {
					char k = pattern[ptr];
					int min = 0, max = 0;
					bool lazy = false;
					bool haveRep = false;


					if (k == '?' || k == '*' || k == '+') {
						++ ptr;
						haveRep = true;

						switch (k) {
						case '?': min = 0; max = 1; break;
						case '*': min = 0; max = 0x7fffffff; break;
						case '+': min = 1; max = 0x7fffffff; break;
						}
					} else if (k == '{' && ptr + 1 < pattern.Length) {
						int saved_ptr = ptr;
						++ptr;
						haveRep = ParseRepetitionBounds (out min, out max, options);
						if (!haveRep)
							ptr = saved_ptr;
					}

					if (haveRep) {
						ConsumeWhitespace (IsIgnorePatternWhitespace (options));
						if (ptr < pattern.Length && pattern[ptr] == '?') {
							++ ptr;
							lazy = true;
						}

						//It doesn't make sense to assert a given position more than once.
						bool ignore_repetition = false;
						if (expr is PositionAssertion) {
							ignore_repetition = min > 0 && !lazy;
							max = 1;
						}

						if (!ignore_repetition) {
							Repetition repetition = new Repetition (min, max, lazy);
	
							if (expr == null)
								repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options));
							else
								repetition.Expression = expr;
	
							expr = repetition;
						}
					}
				}

				// (3) Append Expression and/or Literal

				if (expr == null) {
					if (literal == null)
						literal = "";
					literal += ch;
				}
				else {
					if (literal != null) {
						current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
						literal = null;
					}

					current.AppendExpression (expr);
					expr = null;
				}

				if (is_top_level && ptr >= pattern.Length)
					goto EndOfGroup;
			}

		EndOfGroup:
			if (is_top_level && closed)
				throw NewParseException ("Too many )'s.");
			if (!is_top_level && !closed)
				throw NewParseException ("Not enough )'s.");
				
		
			// clean up literals and alternations

			if (literal != null)
				current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));

			if (assertion != null) {
				if (assertion.TrueExpression == null)
					assertion.TrueExpression = current;
				else
					assertion.FalseExpression = current;
				
				group.AppendExpression (assertion);
			}
			else if (alternation != null) {
				alternation.AddAlternative (current);
				group.AppendExpression (alternation);
			}
			else
				group.AppendExpression (current);
		}
Ejemplo n.º 2
0
        // private methods

        private void ParseGroup(Group group, RegexOptions options, Assertion assertion)
        {
            bool is_top_level = group is RegularExpression;

            Alternation alternation = null;
            string      literal     = null;

            Group      current = new Group();
            Expression expr    = null;
            bool       closed  = false;

            while (true)
            {
                ConsumeWhitespace(IsIgnorePatternWhitespace(options));
                if (ptr >= pattern.Length)
                {
                    break;
                }

                // (1) Parse for Expressions

                char ch = pattern[ptr++];

                switch (ch)
                {
                case '^': {
                    Position pos =
                        IsMultiline(options) ? Position.StartOfLine : Position.Start;
                    expr = new PositionAssertion(pos);
                    break;
                }

                case '$': {
                    Position pos =
                        IsMultiline(options) ? Position.EndOfLine : Position.End;
                    expr = new PositionAssertion(pos);
                    break;
                }

                case '.': {
                    Category cat =
                        IsSingleline(options) ? Category.AnySingleline : Category.Any;
                    expr = new CharacterClass(cat, false);
                    break;
                }

                case '\\': {
                    int c = ParseEscape();
                    if (c >= 0)
                    {
                        ch = (char)c;
                    }
                    else
                    {
                        expr = ParseSpecial(options);

                        if (expr == null)
                        {
                            ch = pattern[ptr++];                                        // default escape
                        }
                    }
                    break;
                }

                case '[': {
                    expr = ParseCharacterClass(options);
                    break;
                }

                case '(': {
                    bool ignore = IsIgnoreCase(options);
                    expr = ParseGroupingConstruct(ref options);
                    if (expr == null)
                    {
                        if (literal != null && IsIgnoreCase(options) != ignore)
                        {
                            current.AppendExpression(new Literal(literal, IsIgnoreCase(options)));
                            literal = null;
                        }

                        continue;
                    }
                    break;
                }

                case ')': {
                    closed = true;
                    goto EndOfGroup;
                }

                case '|': {
                    if (literal != null)
                    {
                        current.AppendExpression(new Literal(literal, IsIgnoreCase(options)));
                        literal = null;
                    }

                    if (assertion != null)
                    {
                        if (assertion.TrueExpression == null)
                        {
                            assertion.TrueExpression = current;
                        }
                        else if (assertion.FalseExpression == null)
                        {
                            assertion.FalseExpression = current;
                        }
                        else
                        {
                            throw NewParseException("Too many | in (?()|).");
                        }
                    }
                    else
                    {
                        if (alternation == null)
                        {
                            alternation = new Alternation();
                        }

                        alternation.AddAlternative(current);
                    }

                    current = new Group();
                    continue;
                }

                case '*':
                case '+':
                case '?': {
                    throw NewParseException("Bad quantifier.");
                }

                default:
                    break;                              // literal character
                }

                ConsumeWhitespace(IsIgnorePatternWhitespace(options));

                // (2) Check for Repetitions

                if (ptr < pattern.Length)
                {
                    char k = pattern[ptr];
                    int  min = 0, max = 0;
                    bool lazy    = false;
                    bool haveRep = false;


                    if (k == '?' || k == '*' || k == '+')
                    {
                        ++ptr;
                        haveRep = true;

                        switch (k)
                        {
                        case '?': min = 0; max = 1; break;

                        case '*': min = 0; max = 0xffff; break;

                        case '+': min = 1; max = 0xffff; break;
                        }
                    }
                    else if (k == '{' && ptr + 1 < pattern.Length)
                    {
                        int saved_ptr = ptr;
                        ++ptr;
                        haveRep = ParseRepetitionBounds(out min, out max, options);
                        if (!haveRep)
                        {
                            ptr = saved_ptr;
                        }
                    }

                    if (haveRep)
                    {
                        ConsumeWhitespace(IsIgnorePatternWhitespace(options));
                        if (ptr < pattern.Length && pattern[ptr] == '?')
                        {
                            ++ptr;
                            lazy = true;
                        }

                        Repetition repetition = new Repetition(min, max, lazy);

                        if (expr == null)
                        {
                            repetition.Expression = new Literal(ch.ToString(), IsIgnoreCase(options));
                        }
                        else
                        {
                            repetition.Expression = expr;
                        }

                        expr = repetition;
                    }
                }

                // (3) Append Expression and/or Literal

                if (expr == null)
                {
                    if (literal == null)
                    {
                        literal = "";
                    }
                    literal += ch;
                }
                else
                {
                    if (literal != null)
                    {
                        current.AppendExpression(new Literal(literal, IsIgnoreCase(options)));
                        literal = null;
                    }

                    current.AppendExpression(expr);
                    expr = null;
                }

                if (is_top_level && ptr >= pattern.Length)
                {
                    goto EndOfGroup;
                }
            }

EndOfGroup:
            if (is_top_level && closed)
            {
                throw NewParseException("Too many )'s.");
            }
            if (!is_top_level && !closed)
            {
                throw NewParseException("Not enough )'s.");
            }


            // clean up literals and alternations

            if (literal != null)
            {
                current.AppendExpression(new Literal(literal, IsIgnoreCase(options)));
            }

            if (assertion != null)
            {
                if (assertion.TrueExpression == null)
                {
                    assertion.TrueExpression = current;
                }
                else
                {
                    assertion.FalseExpression = current;
                }

                group.AppendExpression(assertion);
            }
            else if (alternation != null)
            {
                alternation.AddAlternative(current);
                group.AppendExpression(alternation);
            }
            else
            {
                group.AppendExpression(current);
            }
        }