private IResult<RegexNode> GetContainer(Input input) { if (input.AtEnd) { return new Failure<RegexNode>(input, () => "Unexpected end of input reached", () => new[] { "GetContainer" }); } if (input.Current == '|') { // It's possible that we're at a pipe that is creating a top-level alternation. Fail so a higher level // parser can deal with it. return new Failure<RegexNode>(input, () => "Pipe was first character parsed", () => new[] { "GetContainer" }); } var containerStack = new Stack<ContainerInfo>(); var candidateNodes = new Dictionary<Guid, List<RegexNode>>(); while (!input.AtEnd) { var success = _nonContainer(input) as ISuccess<RegexNode>; if (success != null) { if (containerStack.Count == 0) { if (input.Position == 0) { // Assume that we might have a top-level alternation - if it turns out we don't, we'll fail anyway containerStack.Push(new ContainerInfo { Index = input.Position, ContainerType = ContainerType.Alternation }); } else { return new Failure<RegexNode>(input, () => "Last character of input reached - captures are not possible", () => new[] { "GetContainer" }); } } var key = containerStack.Peek().Key; if (!candidateNodes.ContainsKey(key)) { candidateNodes.Add(key, new List<RegexNode>()); } candidateNodes[key].Add(success.Result); input = success.Remainder; } else { switch (input.Current) { case ')': var containerInfo = containerStack.Pop(); if (containerInfo.ContainerType == ContainerType.Alternation) { var alternationKey = containerInfo.Key; var alternationChildren = candidateNodes[alternationKey]; var alternation = CreateAlternation(input, alternationChildren); containerInfo = containerStack.Pop(); candidateNodes[containerInfo.Key] = new List<RegexNode> { alternation }; } var children = candidateNodes[containerInfo.Key]; candidateNodes.Remove(containerInfo.Key); var index = containerInfo.Index; var pattern = input.Source.Substring(containerInfo.Index, (input.Position - containerInfo.Index) + 1); ContainerNode paren; if (containerInfo.ParenType == ParenType.Capturing) { paren = new CapturingParens(children, index, pattern); } else if (containerInfo.ParenType == ParenType.NonCapturing) { paren = new NonCapturingParens(children, index, pattern); } else if (containerInfo.ParenType == ParenType.PositiveLookAhead) { paren = new LookAhead(false, children, index, pattern); } else if (containerInfo.ParenType == ParenType.NegativeLookAhead) { paren = new LookAhead(true, children, index, pattern); } else if (containerInfo.ParenType == ParenType.PositiveLookBehind) { paren = new LookBehind(false, children, index, pattern); } else if (containerInfo.ParenType == ParenType.NegativeLookBehind) { paren = new LookBehind(true, children, index, pattern); } else if (containerInfo.ParenType == ParenType.Atomic) { paren = new AtomicGrouping(children, index, pattern); } else { paren = new NamedCapture(containerInfo.ParenType.Name, children, index, pattern); } if (containerStack.Count > 0) { var parenKey = containerStack.Peek().Key; if (!candidateNodes.ContainsKey(parenKey)) { candidateNodes.Add(parenKey, new List<RegexNode>()); } candidateNodes[parenKey].Add(paren); } else { return new Success<RegexNode>(paren, input.Advance()); } break; case '(': var parentTypeParser = from leftParen in Parse.Char('(') from p in (from question in Parse.Char('?') from p in Parse.Char(':').Select(x => ParenType.NonCapturing) .Or(Parse.Char('=').Select(x => ParenType.PositiveLookAhead)) .Or(Parse.Char('!').Select(x => ParenType.NegativeLookAhead)) .Or(Parse.Char('>').Select(x => ParenType.Atomic)) .Or(Parse.String("<=").Select(x => ParenType.PositiveLookBehind)) .Or(Parse.String("<!").Select(x => ParenType.NegativeLookBehind)) .Or( from open in Parse.Char('<') from name in Parse.LetterOrDigit.Or(Parse.Char('_')).AtLeastOnce().Text() from close in Parse.Char('>') select ParenType.NamedCapture(name)) .Or( from open in Parse.Char('\'') from name in Parse.LetterOrDigit.Or(Parse.Char('_')).AtLeastOnce().Text() from close in Parse.Char('\'') select ParenType.NamedCapture(name)) select p) .XOr(Parse.Return(ParenType.Capturing)) select p; var parenType = ((ISuccess<ParenType>) parentTypeParser(input)).Result; containerStack.Push(new ContainerInfo { Index = input.Position, ContainerType = ContainerType.Parens, ParenType = parenType }); input = parenType.Advance(input); break; case '|': var container = containerStack.Peek(); if (container.ContainerType != ContainerType.Alternation) { var alternationNodes = candidateNodes[container.Key]; var partialAlternation = new AlternationMarker(alternationNodes.First().Index); candidateNodes[container.Key] = new List<RegexNode> { partialAlternation }; var alternationInfo = new ContainerInfo { Index = partialAlternation.Index, ContainerType = ContainerType.Alternation }; containerStack.Push(alternationInfo); container = containerStack.Peek(); candidateNodes[container.Key] = new List<RegexNode>(alternationNodes); } candidateNodes[container.Key].Add(new AlternationMarker(input.Position)); break; default: return new Failure<RegexNode>(input, () => "Unexpected character found inside parenthesis", () => new[] { "GetContainer" }); } input = input.Advance(); } } if (containerStack.Count == 1 && containerStack.Peek().ContainerType == ContainerType.Alternation) { var containerInfo = containerStack.Pop(); var candidates = candidateNodes[containerInfo.Key]; if (candidates.Any(c => c is AlternationMarker)) { var alternation = CreateAlternation(input, candidates); return new Success<RegexNode>(alternation, input.AtEnd ? input : input.Advance()); } } return new Failure<RegexNode>(input, () => "Unmatched parentheses", () => new[] { "GetContainer" }); }
public Input Advance(Input input) { if (this == Capturing) { return input; } if (Name.Contains("<")) // All types except Named have a '<' (and a '>') in their names. And those types have a fixed-length identifier. { input = input.Advance().Advance(); // Advances past the '?' and the first character of the identifier. if (this == PositiveLookBehind || this == NegativeLookBehind) { input = input.Advance(); // Look behind has an extra character in its identifier - advance past it. } } else { input = input.Advance().Advance().Advance(); // Advances past the '?' and the '<' and '>' or both '\'' characters in its identifier. input = Name.Aggregate(input, (current, t) => current.Advance()); // Advances past the name, no matter how long it is. } return input; }
private static Alternation CreateAlternation(Input input, List<RegexNode> alternationChildren) { var childIndex = 0; var choices = new List<AlternationChoice>(); while (true) { var choiceChildren = alternationChildren.Skip(childIndex).TakeWhile(c => !(c is AlternationMarker)).ToList(); if (choiceChildren.Count == 0) { break; } var firstChildIndex = choiceChildren.First().Index; var lastChild = choiceChildren.Last(); choices.Add(new AlternationChoice(choiceChildren, firstChildIndex, input.Source.Substring(firstChildIndex, (lastChild.Index + lastChild.Pattern.Length) - firstChildIndex))); childIndex += choiceChildren.Count + 1; } var firstChoiceIndex = choices.First().Index; var lastChoice = choices.Last(); var alternation = new Alternation(choices, choices.First().Index, input.Source.Substring(firstChoiceIndex, (lastChoice.Index + lastChoice.Pattern.Length) - firstChoiceIndex)); return alternation; }