Ejemplo n.º 1
0
        public static Match <Lexem> NextOrThrow(IMatchIterator <Lexem> iterator)
        {
            if (!iterator.TryMatchNext(out var match))
            {
                throw CreateException("unrecognized character", iterator.Position);
            }

            return(match);
        }
Ejemplo n.º 2
0
        public static Node MatchSequence(IMatchIterator <Lexem> iterator, Match <Lexem> match, bool atTopLevel)
        {
            var nodes = new List <Node>();

            while (true)
            {
                // Match literal character or character class
                Node node;

                switch (match.Value.Type)
                {
                case LexemType.ClassBegin:
                    node = MatchClass(iterator, NextOrThrow(iterator));

                    break;

                case LexemType.End:
                    if (atTopLevel)
                    {
                        return(Node.CreateSequence(nodes));
                    }

                    throw CreateException("unfinished sequence", iterator.Position);

                case LexemType.Escape:
                    node = Node.CreateCharacter(match.Value.Replacement);

                    break;

                case LexemType.Wildcard:
                    node = Wildcard;

                    break;

                case LexemType.ZeroOrMore:
                    node = Node.CreateRepeat(Wildcard, 0, -1);

                    break;

                default:
                    node = Node.CreateCharacter(match.Capture[0]);

                    break;
                }

                match = NextOrThrow(iterator);

                nodes.Add(node);
            }
        }
Ejemplo n.º 3
0
        private static (int min, int max) MatchRepeat(IMatchIterator <Lexem> iterator, Match <Lexem> match)
        {
            var buffer = new StringBuilder();

            while (match.Value.Type == LexemType.Digit)
            {
                buffer.Append(match.Capture[0]);

                match = NextOrThrow(iterator);
            }

            int max;
            var min = buffer.Length > 0 ? int.Parse(buffer.ToString()) : 0;

            if (match.Value.Type == LexemType.Comma)
            {
                buffer.Clear();

                match = NextOrThrow(iterator);

                while (match.Value.Type == LexemType.Digit)
                {
                    buffer.Append(match.Capture[0]);

                    match = NextOrThrow(iterator);
                }

                max = buffer.Length > 0 ? int.Parse(buffer.ToString()) : -1;

                if (max >= 0 && max < min)
                {
                    throw CreateException("invalid repeat sequence", iterator.Position);
                }
            }
            else
            {
                max = min;
            }

            if (match.Value.Type != LexemType.RepeatEnd)
            {
                throw CreateException("expected end of repeat specifier", iterator.Position);
            }

            return(min, max);
        }
Ejemplo n.º 4
0
        public static (Node, Match <Lexem>) MatchAlternative(IMatchIterator <Lexem> iterator, Match <Lexem> match, bool atTopLevel)
        {
            var alternativeNodes = new List <Node>();

            while (true)
            {
                var(sequenceNodes, nextMatch) = MatchSequence(iterator, match, atTopLevel);

                alternativeNodes.Add(sequenceNodes);

                if (nextMatch.Value.Type != LexemType.Alternative)
                {
                    return(Node.CreateAlternative(alternativeNodes), nextMatch);
                }

                match = NextOrThrow(iterator);
            }
        }
Ejemplo n.º 5
0
        public static (Node, Match <Lexem>) MatchSequence(IMatchIterator <Lexem> iterator, Match <Lexem> match, bool atTopLevel)
        {
            var sequenceNodes = new List <Node>();

            while (true)
            {
                // Match literal character or character class
                Match <Lexem> nextMatch;
                Node          node;

                switch (match.Value.Type)
                {
                case LexemType.Alternative:
                    return(Node.CreateSequence(sequenceNodes), match);

                case LexemType.End:
                    if (!atTopLevel)
                    {
                        throw CreateException("unfinished parenthesis", iterator.Position);
                    }

                    return(Node.CreateSequence(sequenceNodes), match);

                case LexemType.ClassBegin:
                    node      = MatchClass(iterator, NextOrThrow(iterator));
                    nextMatch = NextOrThrow(iterator);

                    break;

                case LexemType.Escape:
                    node      = Node.CreateCharacter(match.Value.Replacement);
                    nextMatch = NextOrThrow(iterator);

                    break;

                case LexemType.SequenceBegin:
                    var(alternativeNode, alternativeNextMatch) = MatchAlternative(iterator, NextOrThrow(iterator), false);

                    node      = alternativeNode;
                    nextMatch = alternativeNextMatch;

                    break;

                case LexemType.SequenceEnd:
                    if (!atTopLevel)
                    {
                        return(Node.CreateSequence(sequenceNodes), NextOrThrow(iterator));
                    }

                    node      = Node.CreateCharacter(match.Capture[0]);
                    nextMatch = NextOrThrow(iterator);

                    break;

                case LexemType.Wildcard:
                    node      = Node.CreateCharacter(new[] { new NodeRange(char.MinValue, char.MaxValue) });
                    nextMatch = NextOrThrow(iterator);

                    break;

                default:
                    node      = Node.CreateCharacter(match.Capture[0]);
                    nextMatch = NextOrThrow(iterator);

                    break;
                }

                // Match repeat specifier if any
                int max;
                int min;

                switch (nextMatch.Value.Type)
                {
                case LexemType.OneOrMore:
                    (min, max) = (1, -1);
                    match      = NextOrThrow(iterator);

                    break;

                case LexemType.RepeatBegin:
                    (min, max) = MatchRepeat(iterator, NextOrThrow(iterator));
                    match      = NextOrThrow(iterator);

                    break;

                case LexemType.ZeroOrMore:
                    (min, max) = (0, -1);
                    match      = NextOrThrow(iterator);

                    break;

                case LexemType.ZeroOrOne:
                    (min, max) = (0, 1);
                    match      = NextOrThrow(iterator);

                    break;

                default:
                    (min, max) = (1, 1);
                    match      = nextMatch;

                    break;
                }

                sequenceNodes.Add(Node.CreateRepeat(node, min, max));
            }
        }
Ejemplo n.º 6
0
        private static Node MatchClass(IMatchIterator <Lexem> iterator, Match <Lexem> match)
        {
            var ranges = new List <NodeRange>();

            // Allow first character of a class to be special "negate class" character
            if (match.Value.Type == LexemType.Negate)
            {
                throw new NotImplementedException("negated character classes are not supported yet");
            }

            // Allow first (or post-negate) character of a class to be literal "end of class" character
            if (match.Value.Type == LexemType.ClassEnd)
            {
                ranges.Add(new NodeRange(match.Capture[0], match.Capture[0]));

                match = NextOrThrow(iterator);
            }

            while (true)
            {
                // Match next character, which may later be considered as the
                // beginning character of range
                char begin;
                char end;

                switch (match.Value.Type)
                {
                case LexemType.End:
                    throw CreateException("unfinished characters class", iterator.Position);

                case LexemType.ClassEnd:
                    return(Node.CreateCharacter(ranges));

                case LexemType.Escape:
                    begin = match.Value.Replacement;

                    break;

                default:
                    begin = match.Capture[0];

                    break;
                }

                match = NextOrThrow(iterator);

                // If next lexem defines a range (e.g. "a-z"), read next one to
                // get end character for this range before registering it
                if (match.Value.Type == LexemType.Range)
                {
                    match = NextOrThrow(iterator);

                    switch (match.Value.Type)
                    {
                    case LexemType.End:
                        throw CreateException("unfinished characters class", iterator.Position);

                    case LexemType.Escape:
                        end = match.Value.Replacement;

                        break;

                    default:
                        end = match.Capture[0];

                        break;
                    }

                    match = NextOrThrow(iterator);
                }

                // Otherwise register transition from a single character
                else
                {
                    end = begin;
                }

                ranges.Add(new NodeRange(begin, end));
            }
        }
Ejemplo n.º 7
0
 /// <Summary>
 /// Compile regular pattern into graph of non-deterministic states leading to given value.
 /// </Summary>
 protected abstract Node CreateGraph(IMatchIterator <Lexem> iterator);