void Process(Node targetNode, Queue<Token> tokenQueue, TreeBuilderState state) { state.ProcessingState = new ProcessingState(targetNode, tokenQueue); while (tokenQueue.Any()) { var token = tokenQueue.Dequeue(); var tokenType = token.Type; var rule = rules .Where(r => r.TokenType == tokenType) .FirstOrDefault(); if (rule == null) { targetNode.AddChild(new ParseFailureNode(token, "unexpected token")); break; } var node = rule.NodeBuilder(token, state); if (node != null) targetNode.AddChild(node); } }
public static Node BuildGroupNode(Token startToken, TreeBuilderState state) { var contentsTokens = new List<Token>(); Token endToken = null; var nestedGroupDepth = 0; while (state.ProcessingState.Tokens.Any()) { var token = state.ProcessingState.Tokens.Dequeue(); switch (token.Type) { case TokenType.GroupStart: nestedGroupDepth++; break; case TokenType.GroupEnd: nestedGroupDepth--; break; } if (nestedGroupDepth >= 0) { contentsTokens.Add(token); } else { endToken = token; break; } } if (endToken == null) return new ParseFailureNode(startToken, "group is never closed"); var combinedData = Token.GetData(startToken, contentsTokens, endToken); var groupNode = new GroupNode( combinedData, startToken.StartIndex ); // Queue the group contents for processing state.ProcessingQueue.Enqueue(new KeyValuePair<Node, IEnumerable<Token>>( groupNode, contentsTokens )); return groupNode; }
public static Node BuildBasicQuantifierNode(Token startToken, TreeBuilderState state) { int? min; int? max; switch (startToken.Data) { case "*": min = 0; max = null; break; case "+": min = 1; max = null; break; case "?": min = 0; max = 1; break; default: return new ParseFailureNode(startToken, "Unrecognized quantifier sequence."); } return BuildQuantifierNode(new[] { startToken }, state, min, max); }
internal Node BuildExpressionNode(IEnumerable<Token> tokens) { var state = new TreeBuilderState(); var expressionNode = new ExpressionNode(Token.GetData(tokens), 0); state.ProcessingQueue.Enqueue(new KeyValuePair<Node, IEnumerable<Token>>( expressionNode, tokens)); while (state.ProcessingQueue.Any()) { var processingStep = state.ProcessingQueue.Dequeue(); var node = processingStep.Key; var tokenQueue = new Queue<Token>(processingStep.Value); Process(node, tokenQueue, state); } return expressionNode; }
public static Node BuildCharacterNode(Token startToken, TreeBuilderState state) { var dataTokenTypes = new[] { TokenType.CharacterEscapeData, TokenType.CharacterEscapeControlMarker, TokenType.CharacterEscapeHexMarker, TokenType.CharacterEscapeUnicodeMarker }; var dataTokens = state .ProcessingState .Tokens .DequeueWhile(t => dataTokenTypes.Contains(t.Type)); if (dataTokens.None()) return new ParseFailureNode(startToken, "Character escape with no data."); if (dataTokens.Any(t => t.Type != TokenType.CharacterEscapeData)) throw new NotImplementedException(); var escapedContent = dataTokens .Aggregate(string.Empty, (d, t) => d + t.Data); var combinedData = Token.GetData(startToken, dataTokens); if (CharacterClassMappings.ContainsKey(escapedContent)) { var characterClass = CharacterClassMappings[escapedContent]; return new CharacterClassNode(combinedData, startToken.StartIndex, characterClass); } return new EscapedCharacterNode( combinedData, startToken.StartIndex, escapedContent ); }
public static Node BuildParametizedQuantifierNode(Token startToken, TreeBuilderState state) { var acceptedQuantifierTokenTypes = new[] { TokenType.Number, TokenType.ParametizedQuantifierRangeSeparator, TokenType.ParametizedQuantifierEnd }; var remainingQuantifierTokens = state.ProcessingState.Tokens .DequeueWhile(t => acceptedQuantifierTokenTypes.Contains(t.Type)); var quantifierTokens = new[] { startToken }.Concat(remainingQuantifierTokens); int? min; int? max; var quantifierTokenTypes = quantifierTokens.Select(t => t.Type); if (quantifierTokenTypes.SequenceEqual(new[] { TokenType.ParametizedQuantifierStart, TokenType.Number, TokenType.ParametizedQuantifierRangeSeparator, TokenType.Number, TokenType.ParametizedQuantifierEnd })) { min = int.Parse(quantifierTokens.ElementAt(1).Data); max = int.Parse(quantifierTokens.ElementAt(3).Data); } else if (quantifierTokenTypes.SequenceEqual(new[] { TokenType.ParametizedQuantifierStart, TokenType.ParametizedQuantifierRangeSeparator, TokenType.Number, TokenType.ParametizedQuantifierEnd })) { min = null; max = int.Parse(quantifierTokens.ElementAt(2).Data); } else if (quantifierTokenTypes.SequenceEqual(new[] { TokenType.ParametizedQuantifierStart, TokenType.Number, TokenType.ParametizedQuantifierRangeSeparator, TokenType.ParametizedQuantifierEnd })) { min = int.Parse(quantifierTokens.ElementAt(1).Data); max = null; } else if (quantifierTokenTypes.SequenceEqual(new[] { TokenType.ParametizedQuantifierStart, TokenType.Number, TokenType.ParametizedQuantifierEnd })) { min = int.Parse(quantifierTokens.ElementAt(1).Data); max = min; } else { return new LiteralNode( Token.GetData(quantifierTokens), startToken.StartIndex ); } return BuildQuantifierNode(quantifierTokens, state, min, max); }
static Node BuildQuantifierNode(IEnumerable<Token> quantifierTokens, TreeBuilderState state, int? min, int? max) { var targetNode = state.ProcessingState.TargetNode; var previousNodes = targetNode.Children; if (previousNodes.None()) return new ParseFailureNode(quantifierTokens.First(), "Nothing preceeding the quantifier."); var immediatelyPriorNode = previousNodes.Last(); // If there's a multi-character literal then we need to split it up Node nodeToQuantify; Node nodeToInsertBeforeQuantifier = null; if (immediatelyPriorNode is LiteralNode && immediatelyPriorNode.Data.Length > 1) { var originalLiteralData = immediatelyPriorNode.Data; nodeToQuantify = new LiteralNode( originalLiteralData.Substring(originalLiteralData.Length - 1), immediatelyPriorNode.StartIndex + originalLiteralData.Length - 1); nodeToInsertBeforeQuantifier = new LiteralNode( originalLiteralData.Substring(0, originalLiteralData.Length - 1), immediatelyPriorNode.StartIndex); } else { nodeToQuantify = immediatelyPriorNode; } var quantifierNode = new QuantifierNode( nodeToQuantify.Data + Token.GetData(quantifierTokens), nodeToQuantify.StartIndex, min, max, nodeToQuantify ); targetNode.ReplaceLastChild(nodeToInsertBeforeQuantifier, quantifierNode); return null; }
public static Node ProcessGroupDirective(Token startToken, TreeBuilderState state) { var group = (GroupNode)state.ProcessingState.TargetNode; var strategies = new Dictionary<IEnumerable<PatternSegment<Token>>, Action<GroupNode, IEnumerable<Token>>> { // Named group { new[] { new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierStartOrLookBehindMarker, 1), new PatternSegment<Token>(t => t.Type == TokenType.Literal), new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierEnd, 1), }, ProcessNamedGroupDirective }, // Balancing group { new[] { new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierStartOrLookBehindMarker, 1), new PatternSegment<Token>(t => t.Type == TokenType.Literal), new PatternSegment<Token>(t => t.Type == TokenType.BalancingGroupNamedIdentifierSeparator, 1), new PatternSegment<Token>(t => t.Type == TokenType.Literal), new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierEnd, 1), }, ProcessBalancingGroupDirective }, // Non-capturing { new[] { new PatternSegment<Token>(t => t.Type == TokenType.NonCapturingGroupMarker, 1) }, (g, t) => { g.GroupMode = GroupMode.NonCapturingGroup; } }, // Positive lookahead { new[] { new PatternSegment<Token>(t => t.Type == TokenType.PositiveLookAheadMarker, 1) }, (g, t) => { g.GroupMode = GroupMode.PositiveLookAhead; } }, // Negative lookahead { new[] { new PatternSegment<Token>(t => t.Type == TokenType.NegativeLookAheadMarker, 1) }, (g, t) => { g.GroupMode = GroupMode.NegativeLookAhead; } }, // Positive lookbehind { new[] { new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierStartOrLookBehindMarker, 1), new PatternSegment<Token>(t => t.Type == TokenType.PositiveLookBehindMarker, 1) }, (g, t) => { g.GroupMode = GroupMode.PositiveLookBehind; } }, // Negative lookbehind { new[] { new PatternSegment<Token>(t => t.Type == TokenType.NamedIdentifierStartOrLookBehindMarker, 1), new PatternSegment<Token>(t => t.Type == TokenType.NegativeLookBehindMarker, 1) }, (g, t) => { g.GroupMode = GroupMode.NegativeLookBehind; } }, }; foreach (var strategy in strategies) { var tokens = state .ProcessingState .Tokens .DequeuePattern(strategy.Key); if (!tokens.Any()) continue; strategy.Value(group, tokens); break; } return null; }