public void Build() { var lookAhead = String.Empty; if (!LookAhead.IsEmpty) { lookAhead = "(?=" + LookAhead.ToExpression() + ")"; } else if (!this.NegLookAhead.IsEmpty) { lookAhead = "(?!" + NegLookAhead.ToExpression() + ")"; } /////////////////////////////////////////////////////////////////////////////// var lookBehind = String.Empty; if (!LookBehind.IsEmpty) { lookBehind = "(?<=" + LookBehind.ToExpression() + ")"; } else if (!NegLookBehind.IsEmpty) { lookBehind = "(?<!" + NegLookBehind.ToExpression() + ")"; } /////////////////////////////////////////////////////////////////////////////// _builtExpression = lookBehind + this.Expression + lookAhead; _regex = new Regex(_builtExpression, _options); /////////////////////////////////////////////////////////////////////////////// // TODO //InvalidateMatches(); }
public IEnumerable <Interval <string[]> > MatchScope(Word word, Interval[] scopes, int scopeIdx) { var scope = scopes[scopeIdx]; int index = scope.Start; if (LastScope && scopeIdx < scopes.Length - 1) { yield break; } if (NextScopeQuery != null) { if (scopeIdx == scopes.Length - 1) { yield break; } var nextScope = scopes[scopeIdx + 1]; var next = NextScopeQuery.Match(word, nextScope.Start, nextScope); if (next == null) { yield break; } } while (index < scope.End) { var behind = LookBehind.Match(word, index, scope); if (behind == null) { index++; continue; } var match = Query.Match(word, behind.End, scope); if (match == null) { index++; continue; } var ahead = LookAhead.Match(word, match.End, scope); if (ahead == null) { index++; continue; } var negAhead = NegativeLookAhead.Match(word, match.End, scope); if (negAhead != null) { index++; continue; } yield return(match); index = match.End; // Prevent infinite loop when empty match if (match.Length == 0) { index++; } } }
private IResult <RegexNode> GetContainer(Input input, Parser <QuantifierParams> quantifierParamsParser) { if (input.AtEnd) { return(new Failure <RegexNode>(input, () => "Unexpected end of input reached", () => new[] { "GetContainer" })); } if (input.Current == '|') { // It's possible that we're at a pipe that is creating a top-level alternation. Fail so a higher level // parser can deal with it. return(new Failure <RegexNode>(input, () => "Pipe was first character parsed", () => new[] { "GetContainer" })); } var containerStack = new Stack <ContainerInfo>(); var candidateNodes = new Dictionary <Guid, List <RegexNode> >(); while (!input.AtEnd) { var nonContainerSuccess = _nonContainer(input) as ISuccess <RegexNode>; if (nonContainerSuccess != null) { if (containerStack.Count == 0) { if (input.Position == 0) { // Assume that we might have a top-level alternation - if it turns out we don't, we'll fail anyway containerStack.Push(new ContainerInfo { Index = input.Position, ContainerType = ContainerType.Alternation }); } else { return(new Failure <RegexNode>(input, () => "Last character of input reached - captures are not possible", () => new[] { "GetContainer" })); } } var key = containerStack.Peek().Key; if (!candidateNodes.ContainsKey(key)) { candidateNodes.Add(key, new List <RegexNode>()); } candidateNodes[key].Add(nonContainerSuccess.Result); input = nonContainerSuccess.Remainder; } else { var quantifierParamsSuccess = quantifierParamsParser(input) as ISuccess <QuantifierParams>; if (quantifierParamsSuccess != null) { var parenKey = containerStack.Peek().Key; var nodes = candidateNodes[parenKey]; var quantifierPattern = input.Source.Substring(input.Position, quantifierParamsSuccess.Remainder.Position - input.Position); nodes[nodes.Count - 1] = CreateQuantifier( quantifierParamsSuccess.Result, nodes[nodes.Count - 1], nodes[nodes.Count - 1].Index, nodes[nodes.Count - 1].Pattern + quantifierPattern); input = quantifierParamsSuccess.Remainder; continue; } else { switch (input.Current) { case ')': var containerInfo = containerStack.Pop(); if (containerInfo.ContainerType == ContainerType.Alternation) { var alternationKey = containerInfo.Key; var alternationChildren = candidateNodes[alternationKey]; var alternation = CreateAlternation(input, alternationChildren); containerInfo = containerStack.Pop(); candidateNodes[containerInfo.Key] = new List <RegexNode> { alternation }; } var children = candidateNodes[containerInfo.Key]; candidateNodes.Remove(containerInfo.Key); var index = containerInfo.Index; var pattern = input.Source.Substring(containerInfo.Index, (input.Position - containerInfo.Index) + 1); ContainerNode paren; if (containerInfo.ParenType == ParenType.Capturing) { paren = new CapturingParens(children, index, pattern); } else if (containerInfo.ParenType == ParenType.NonCapturing) { paren = new NonCapturingParens(children, index, pattern); } else if (containerInfo.ParenType == ParenType.PositiveLookAhead) { paren = new LookAhead(false, children, index, pattern); } else if (containerInfo.ParenType == ParenType.NegativeLookAhead) { paren = new LookAhead(true, children, index, pattern); } else if (containerInfo.ParenType == ParenType.PositiveLookBehind) { paren = new LookBehind(false, children, index, pattern); } else if (containerInfo.ParenType == ParenType.NegativeLookBehind) { paren = new LookBehind(true, children, index, pattern); } else if (containerInfo.ParenType == ParenType.Atomic) { paren = new AtomicGrouping(children, index, pattern); } else { paren = new NamedCapture(containerInfo.ParenType.Name, children, index, pattern); } if (containerStack.Count > 0) { var parenKey = containerStack.Peek().Key; if (!candidateNodes.ContainsKey(parenKey)) { candidateNodes.Add(parenKey, new List <RegexNode>()); } candidateNodes[parenKey].Add(paren); } else { return(new Success <RegexNode>(paren, input.Advance())); } break; case '(': var parentTypeParser = from leftParen in Parse.Char('(') from p in (from question in Parse.Char('?') from p in Parse.Char(':').Select(x => ParenType.NonCapturing) .Or(Parse.Char('=').Select(x => ParenType.PositiveLookAhead)) .Or(Parse.Char('!').Select(x => ParenType.NegativeLookAhead)) .Or(Parse.Char('>').Select(x => ParenType.Atomic)) .Or(Parse.String("<=").Select(x => ParenType.PositiveLookBehind)) .Or(Parse.String("<!").Select(x => ParenType.NegativeLookBehind)) .Or( from open in Parse.Char('<') from name in Parse.LetterOrDigit.Or(Parse.Char('_')).AtLeastOnce().Text() from close in Parse.Char('>') select ParenType.NamedCapture(name)) .Or( from open in Parse.Char('\'') from name in Parse.LetterOrDigit.Or(Parse.Char('_')).AtLeastOnce().Text() from close in Parse.Char('\'') select ParenType.NamedCapture(name)) select p) .XOr(Parse.Return(ParenType.Capturing)) select p; var parenType = ((ISuccess <ParenType>)parentTypeParser(input)).Result; containerStack.Push(new ContainerInfo { Index = input.Position, ContainerType = ContainerType.Parens, ParenType = parenType }); input = parenType.Advance(input); break; case '|': var container = containerStack.Peek(); if (container.ContainerType != ContainerType.Alternation) { var alternationNodes = candidateNodes[container.Key]; var partialAlternation = new AlternationMarker(alternationNodes.First().Index); candidateNodes[container.Key] = new List <RegexNode> { partialAlternation }; var alternationInfo = new ContainerInfo { Index = partialAlternation.Index, ContainerType = ContainerType.Alternation }; containerStack.Push(alternationInfo); container = containerStack.Peek(); candidateNodes[container.Key] = new List <RegexNode>(alternationNodes); } candidateNodes[container.Key].Add(new AlternationMarker(input.Position)); break; default: return(new Failure <RegexNode>(input, () => "Unexpected character found inside parenthesis", () => new[] { "GetContainer" })); } } input = input.Advance(); } } if (containerStack.Count == 1 && containerStack.Peek().ContainerType == ContainerType.Alternation) { var containerInfo = containerStack.Pop(); var candidates = candidateNodes[containerInfo.Key]; if (candidates.Any(c => c is AlternationMarker)) { var alternation = CreateAlternation(input, candidates); return(new Success <RegexNode>(alternation, input.AtEnd ? input : input.Advance())); } } return(new Failure <RegexNode>(input, () => "Unmatched parentheses", () => new[] { "GetContainer" })); }