Esempio n. 1
0
        void Process(Node targetNode, Queue<Token> tokenQueue, TreeBuilderState state)
        {
            state.ProcessingState = new ProcessingState(targetNode, tokenQueue);

            while (tokenQueue.Any())
            {
                var token = tokenQueue.Dequeue();
                var tokenType = token.Type;

                var rule = rules
                    .Where(r => r.TokenType == tokenType)
                    .FirstOrDefault();

                if (rule == null)
                {
                    targetNode.AddChild(new ParseFailureNode(token, "unexpected token"));
                    break;
                }

                var node = rule.NodeBuilder(token, state);

                if (node != null)
                    targetNode.AddChild(node);
            }
        }
        public static Node BuildGroupNode(Token startToken, TreeBuilderState state)
        {
            var contentsTokens = new List<Token>();
            Token endToken = null;

            var nestedGroupDepth = 0;
            while (state.ProcessingState.Tokens.Any())
            {
                var token = state.ProcessingState.Tokens.Dequeue();

                switch (token.Type)
                {
                    case TokenType.GroupStart:
                        nestedGroupDepth++;
                        break;
                    case TokenType.GroupEnd:
                        nestedGroupDepth--;
                        break;
                }

                if (nestedGroupDepth >= 0)
                {
                    contentsTokens.Add(token);
                }
                else
                {
                    endToken = token;
                    break;
                }
            }

            if (endToken == null)
                return new ParseFailureNode(startToken, "group is never closed");

            var combinedData = Token.GetData(startToken, contentsTokens, endToken);

            var groupNode = new GroupNode(
                combinedData,
                startToken.StartIndex
            );

            // Queue the group contents for processing
            state.ProcessingQueue.Enqueue(new KeyValuePair<Node, IEnumerable<Token>>(
                groupNode,
                contentsTokens
            ));

            return groupNode;
        }
        public static Node BuildBasicQuantifierNode(Token startToken, TreeBuilderState state)
        {
            int? min;
            int? max;

            switch (startToken.Data)
            {
                case "*": min = 0; max = null; break;
                case "+": min = 1; max = null; break;
                case "?": min = 0; max = 1; break;
                default:
                    return new ParseFailureNode(startToken, "Unrecognized quantifier sequence.");
            }

            return BuildQuantifierNode(new[] { startToken }, state, min, max);
        }
Esempio n. 4
0
        internal Node BuildExpressionNode(IEnumerable<Token> tokens)
        {
            var state = new TreeBuilderState();

            var expressionNode = new ExpressionNode(Token.GetData(tokens), 0);
            state.ProcessingQueue.Enqueue(new KeyValuePair<Node, IEnumerable<Token>>(
                expressionNode,
                tokens));

            while (state.ProcessingQueue.Any())
            {
                var processingStep = state.ProcessingQueue.Dequeue();

                var node = processingStep.Key;
                var tokenQueue = new Queue<Token>(processingStep.Value);

                Process(node, tokenQueue, state);
            }

            return expressionNode;
        }
        public static Node BuildCharacterNode(Token startToken, TreeBuilderState state)
        {
            var dataTokenTypes = new[]
            {
                TokenType.CharacterEscapeData,
                TokenType.CharacterEscapeControlMarker,
                TokenType.CharacterEscapeHexMarker,
                TokenType.CharacterEscapeUnicodeMarker
            };

            var dataTokens = state
                .ProcessingState
                .Tokens
                .DequeueWhile(t => dataTokenTypes.Contains(t.Type));

            if (dataTokens.None())
                return new ParseFailureNode(startToken, "Character escape with no data.");

            if (dataTokens.Any(t => t.Type != TokenType.CharacterEscapeData))
                throw new NotImplementedException();

            var escapedContent = dataTokens
                .Aggregate(string.Empty, (d, t) => d + t.Data);

            var combinedData = Token.GetData(startToken, dataTokens);

            if (CharacterClassMappings.ContainsKey(escapedContent))
            {
                var characterClass = CharacterClassMappings[escapedContent];
                return new CharacterClassNode(combinedData, startToken.StartIndex, characterClass);
            }

            return new EscapedCharacterNode(
                combinedData,
                startToken.StartIndex,
                escapedContent
            );
        }
        public static Node BuildParametizedQuantifierNode(Token startToken, TreeBuilderState state)
        {
            var acceptedQuantifierTokenTypes = new[]
            {
                TokenType.Number,
                TokenType.ParametizedQuantifierRangeSeparator,
                TokenType.ParametizedQuantifierEnd
            };

            var remainingQuantifierTokens = state.ProcessingState.Tokens
                .DequeueWhile(t => acceptedQuantifierTokenTypes.Contains(t.Type));

            var quantifierTokens = new[] { startToken }.Concat(remainingQuantifierTokens);

            int? min;
            int? max;

            var quantifierTokenTypes = quantifierTokens.Select(t => t.Type);
            if (quantifierTokenTypes.SequenceEqual(new[]
                {
                    TokenType.ParametizedQuantifierStart,
                    TokenType.Number,
                    TokenType.ParametizedQuantifierRangeSeparator,
                    TokenType.Number,
                    TokenType.ParametizedQuantifierEnd
                }))
            {
                min = int.Parse(quantifierTokens.ElementAt(1).Data);
                max = int.Parse(quantifierTokens.ElementAt(3).Data);
            }
            else if (quantifierTokenTypes.SequenceEqual(new[]
                {
                    TokenType.ParametizedQuantifierStart,
                    TokenType.ParametizedQuantifierRangeSeparator,
                    TokenType.Number,
                    TokenType.ParametizedQuantifierEnd
                }))
            {
                min = null;
                max = int.Parse(quantifierTokens.ElementAt(2).Data);
            }
            else if (quantifierTokenTypes.SequenceEqual(new[]
                {
                    TokenType.ParametizedQuantifierStart,
                    TokenType.Number,
                    TokenType.ParametizedQuantifierRangeSeparator,
                    TokenType.ParametizedQuantifierEnd
                }))
            {
                min = int.Parse(quantifierTokens.ElementAt(1).Data);
                max = null;
            }
            else if (quantifierTokenTypes.SequenceEqual(new[]
                {
                    TokenType.ParametizedQuantifierStart,
                    TokenType.Number,
                    TokenType.ParametizedQuantifierEnd
                }))
            {
                min = int.Parse(quantifierTokens.ElementAt(1).Data);
                max = min;
            }
            else
            {
                return new LiteralNode(
                    Token.GetData(quantifierTokens),
                    startToken.StartIndex
                );
            }

            return BuildQuantifierNode(quantifierTokens, state, min, max);
        }
        static Node BuildQuantifierNode(IEnumerable<Token> quantifierTokens, TreeBuilderState state, int? min, int? max)
        {
            var targetNode = state.ProcessingState.TargetNode;
            var previousNodes = targetNode.Children;

            if (previousNodes.None())
                return new ParseFailureNode(quantifierTokens.First(), "Nothing preceeding the quantifier.");

            var immediatelyPriorNode = previousNodes.Last();

            // If there's a multi-character literal then we need to split it up
            Node nodeToQuantify;
            Node nodeToInsertBeforeQuantifier = null;
            if (immediatelyPriorNode is LiteralNode &&
                immediatelyPriorNode.Data.Length > 1)
            {
                var originalLiteralData = immediatelyPriorNode.Data;

                nodeToQuantify = new LiteralNode(
                    originalLiteralData.Substring(originalLiteralData.Length - 1),
                    immediatelyPriorNode.StartIndex + originalLiteralData.Length - 1);
                nodeToInsertBeforeQuantifier = new LiteralNode(
                    originalLiteralData.Substring(0, originalLiteralData.Length - 1),
                    immediatelyPriorNode.StartIndex);
            }
            else
            {
                nodeToQuantify = immediatelyPriorNode;
            }

            var quantifierNode = new QuantifierNode(
                nodeToQuantify.Data + Token.GetData(quantifierTokens),
                nodeToQuantify.StartIndex,
                min,
                max,
                nodeToQuantify
                );

            targetNode.ReplaceLastChild(nodeToInsertBeforeQuantifier, quantifierNode);

            return null;
        }
        public static Node ProcessGroupDirective(Token startToken, TreeBuilderState state)
        {
            var group = (GroupNode)state.ProcessingState.TargetNode;

            var strategies = new Dictionary<IEnumerable<PatternSegment<Token>>, Action<GroupNode, IEnumerable<Token>>>
            {
                // Named group
                {
                    new[]
                    {
                        new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierStartOrLookBehindMarker, 1),
                        new PatternSegment<Token>(t => t.Type == TokenType.Literal),
                        new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierEnd, 1),
                    },
                    ProcessNamedGroupDirective
                },

                // Balancing group
                {
                    new[]
                    {
                        new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierStartOrLookBehindMarker, 1),
                        new PatternSegment<Token>(t => t.Type == TokenType.Literal),
                        new PatternSegment<Token>(t => t.Type == TokenType.BalancingGroupNamedIdentifierSeparator, 1),
                        new PatternSegment<Token>(t => t.Type == TokenType.Literal),
                        new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierEnd, 1),
                    },
                    ProcessBalancingGroupDirective
                },

                // Non-capturing
                {
                    new[]
                    {
                        new PatternSegment<Token>(t => t.Type == TokenType.NonCapturingGroupMarker, 1)
                    },
                    (g, t) => { g.GroupMode = GroupMode.NonCapturingGroup; }
                },

                // Positive lookahead
                {
                    new[]
                    {
                        new PatternSegment<Token>(t => t.Type == TokenType.PositiveLookAheadMarker, 1)
                    },
                    (g, t) => { g.GroupMode = GroupMode.PositiveLookAhead; }
                },

                // Negative lookahead
                {
                    new[]
                    {
                        new PatternSegment<Token>(t => t.Type == TokenType.NegativeLookAheadMarker, 1)
                    },
                    (g, t) => { g.GroupMode = GroupMode.NegativeLookAhead; }
                },

                // Positive lookbehind
                {
                    new[]
                    {
                        new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierStartOrLookBehindMarker, 1),
                        new PatternSegment<Token>(t => t.Type == TokenType.PositiveLookBehindMarker, 1)
                    },
                    (g, t) => { g.GroupMode = GroupMode.PositiveLookBehind; }
                },

                // Negative lookbehind
                {
                    new[]
                    {
                        new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierStartOrLookBehindMarker, 1),
                        new PatternSegment<Token>(t => t.Type == TokenType.NegativeLookBehindMarker, 1)
                    },
                    (g, t) => { g.GroupMode = GroupMode.NegativeLookBehind; }
                },
            };

            foreach (var strategy in strategies)
            {
                var tokens = state
                    .ProcessingState
                    .Tokens
                    .DequeuePattern(strategy.Key);

                if (!tokens.Any()) continue;

                strategy.Value(group, tokens);

                break;
            }

            return null;
        }