/// <summary> /// Parse the required quantities - we must match at least the minimum. /// </summary> private IEnumerable <ParseStep> ParseRequired(IRegexEngine engine, StringBuilder matchedText, State initialState) { for (int i = 0; i < Min; i++) { foreach (var result in Child.Parse(engine)) { if (result.Type == ParseStepType.Break && ReferenceEquals(Child, result.Node)) { break; } yield return(result); if (ReferenceEquals(Child, result.Node)) { if (result.Type == ParseStepType.Pass) { matchedText.Append(result.MatchedText); } else if (result.Type == ParseStepType.Fail) { yield return(ParseStep.Fail( this, initialState, engine.State, string.Format("Greedy quantifier was required to match at least {0} times, but matched {1} times", Min, i))); yield return(ParseStep.Break(this)); } } } } }
internal override IEnumerable <ParseStep> Parse(IRegexEngine engine) { yield return(ParseStep.BeginParse(this, engine.State)); if (engine.State.Index >= engine.Input.Length) { yield return(ParseStep.Fail(this, engine.State, engine.State)); yield return(ParseStep.Break(this)); yield break; } if (Matches(engine.Input[engine.State.Index])) { var match = engine.Input.Substring(engine.State.Index, 1); var initialState = engine.State; engine.State = engine.State.Advance(); yield return(ParseStep.Pass(this, match, initialState, engine.State)); yield return(ParseStep.AdvanceIndex(this, engine.State)); yield return(ParseStep.Break(this)); } else { yield return(ParseStep.Fail(this, engine.State, engine.State)); yield return(ParseStep.Break(this)); } }
internal override sealed IEnumerable <ParseStep> Parse(IRegexEngine engine) { yield return(ParseStep.BeginParse(this, engine.State)); var initialState = engine.State; var matchedText = new StringBuilder(); foreach (var parseStep in ParseRequired(engine, matchedText, initialState)) { yield return(parseStep); } // If min equals max, we're done. if (Min == Max) { yield return(ParseStep.Pass(this, matchedText.ToString(), initialState, engine.State)); yield return(ParseStep.Break(this)); yield break; } foreach (var specific in ParseSpecific(engine, initialState, matchedText)) { if (ReferenceEquals(specific.Node, this) && specific.Type == ParseStepType.Break) { yield return(ParseStep.Break(this)); } else { yield return(specific); } } }
protected override IEnumerable <ParseStep> GetSuccessParseStep(IRegexEngine engine, State initialState) { var matchedText = engine.Input.Substring(initialState.Index, engine.State.Index - initialState.Index); yield return(ParseStep.Pass(this, matchedText, initialState, engine.State)); yield return(ParseStep.Break(this)); }
private IEnumerable <ParseStep> Backtrack(IRegexEngine engine, State initialState, Stack <SavedState> savedStates) { var savedState = savedStates.Pop(); engine.State = savedState.State; yield return(ParseStep.Backtrack(this, initialState, engine.State)); yield return(ParseStep.Pass(this, savedState.MatchedText, initialState, engine.State)); yield return(ParseStep.Break(this)); }
internal override IEnumerable <ParseStep> Parse(IRegexEngine engine) { yield return(ParseStep.BeginParse(this, engine.State)); var initialState = engine.State; var capture = engine.GetCaptures(Number).FirstOrDefault(); if (capture == null || string.IsNullOrEmpty(capture.Value)) { yield return(ParseStep.Fail(this, initialState, engine.State, "No backreference value found")); yield return(ParseStep.Break(this)); } else { var literals = capture.Value.Select((c, i) => new CharacterLiteral(c, _ignoreCase, capture.Index + i, new string(new[] { c }))); foreach (var literal in literals) { var success = false; foreach (var result in literal.Parse(engine)) { if (result.Type == ParseStepType.Break) { break; } if (result.Type == ParseStepType.Pass) { success = true; } } if (!success) { yield return(ParseStep.Fail(this, initialState, engine.State)); yield return(ParseStep.Break(this)); yield break; } } yield return(ParseStep.Pass(this, capture.Value, initialState, engine.State)); yield return(ParseStep.Break(this)); } }
internal override sealed IEnumerable <ParseStep> Parse(IRegexEngine engine) { yield return(ParseStep.BeginParse(this, engine.State)); if (Matches(engine.State)) { yield return(ParseStep.Pass(this, "", engine.State, engine.State)); } else { yield return(ParseStep.Fail(this, engine.State, engine.State)); } yield return(ParseStep.Break(this)); }
protected override IEnumerable <ParseStep> ParseSpecific(IRegexEngine engine, State initialState, StringBuilder matchedText) { for (int i = Min; Max == null || i <= Max; i++) { // We're lazy - we've already matched what was required of us, so declare that we're done. yield return(ParseStep.StateSaved(this, initialState, string.Format("Saving state - index {0}", engine.State.Index))); yield return(ParseStep.Pass(this, matchedText.ToString(), initialState, engine.State)); yield return(ParseStep.Break(this)); // However, if we make it to here, it indicates that we need to match more, in order to (attempt to) get the overall regex to match. // According to the parlance of regex people smarter than me, this is a backtrack, even though it's forward. yield return(ParseStep.Backtrack(this, initialState, engine.State)); var childSuccess = false; foreach (var result in Child.Parse(engine)) { if (result.Type == ParseStepType.Break && ReferenceEquals(Child, result.Node)) { break; } yield return(result); if (ReferenceEquals(Child, result.Node)) { if (result.Type == ParseStepType.Pass) { matchedText.Append(result.MatchedText); childSuccess = true; } } } if (!childSuccess) { break; } } // If we ever make it outside the loop, it means either we were asked to backtrack and our child didn't pass, // or, we were asked to backtrack more than the Max allowed matches yield return(ParseStep.Fail(this, initialState, engine.State, "Exceeded max allowled quantities")); yield return(ParseStep.Break(this)); }
protected override IEnumerable <ParseStep> GetEndOfStringSteps(IRegexEngine engine) { yield return(ParseStep.EndOfString(this, engine.State)); yield return(ParseStep.Break(this)); }
protected override IEnumerable <ParseStep> GetFailParseSteps(IRegexEngine engine, State initialState, State currentState, bool skipAdvance) { yield return(ParseStep.Fail(this, initialState, currentState)); yield return(ParseStep.Break(this)); }
protected override IEnumerable <ParseStep> ParseSpecific(IRegexEngine engine, State initialState, StringBuilder matchedText) { // At this point, we know we'll match. Attempt to match everything else, until we hit max, a non-match, or the end of the string. // We need to save states as we go, in case we're asked to backtrack. var savedStates = new Stack <SavedState>(); int matchedQuantity = Min; for (; Max == null || matchedQuantity < Max; matchedQuantity++) { // If the last state we saved was at the end of the string, there's no point in going any further... if (savedStates.Count > 0 && savedStates.Peek().State.Index >= engine.Input.Length) { // Should we ever get here??? savedStates.Pop(); break; } if (engine.State.Index < engine.Input.Length) { // We're not at the end of the string, so save state before attempting a match - since we're greedy, we leave breadcrumbs before doing anything. savedStates.Push(new SavedState(engine.State, matchedText.ToString())); yield return(ParseStep.StateSaved(this, engine.State, string.Format("Saving state - index {0}", engine.State.Index))); } else { // It looks like we're at the end of the string - which means we're done. Time to report as such. yield return(ParseStep.EndOfString(this, engine.State)); yield return(ParseStep.Pass(this, matchedText.ToString(), initialState, engine.State)); yield return(ParseStep.Break(this)); break; } var endOfMatch = false; // TODO: I think that we're going to need to use an enumerator here so we can initiate backtracking in our child and its descendants. foreach (var result in Child.Parse(engine)) { if (result.Type == ParseStepType.Break && ReferenceEquals(Child, result.Node)) { break; } yield return(result); if (ReferenceEquals(Child, result.Node)) { if (result.Type == ParseStepType.Pass) { matchedText.Append(result.MatchedText); } else if (result.Type == ParseStepType.Fail) { endOfMatch = true; foreach (var backtrackStep in Backtrack(engine, initialState, savedStates)) { yield return(backtrackStep); } break; } } } if (endOfMatch) { break; } } if (matchedQuantity >= Max) { // We've reached the maximum allowed quantity of repetitions, time to break; yield return(ParseStep.Pass(this, matchedText.ToString(), initialState, engine.State)); yield return(ParseStep.Break(this)); } // If we get here, it means that we're backtracking while (savedStates.Count > 0) { foreach (var backtrackStep in Backtrack(engine, initialState, savedStates)) { yield return(backtrackStep); } } // If we get here, we ran out of saved states to backtrack to - report failure. yield return(ParseStep.Fail(this, initialState, engine.State, "No backtrack is available")); if (engine.State.Index != initialState.Index) { yield return(ParseStep.ResetIndex(this, initialState, engine.State)); engine.State = initialState; } yield return(ParseStep.Break(this)); }
internal override IEnumerable <ParseStep> Parse(IRegexEngine outerEngine) { var engine = new RegexEngine.RegexEngineInternal(GetEngineInput(outerEngine)); foreach (var capture in outerEngine.GetAllDefinedCaptures().SelectMany(kvp => kvp.Value)) { engine.AddCapture(capture.Number, capture.Index, capture.Value); } var modifier = GetModifier(outerEngine); bool match = false; yield return(ParseStep.StartLookaround(this, engine.State.Plus(modifier)).WithSkipAdvanceOnFail(ShouldSkipAdvance)); foreach (var result in _regex.Parse(engine)) { // Don't report the results of the non-reporting start of string element. if (ReferenceEquals(result.Node, _nonReportingNode)) { continue; } if (ReferenceEquals(result.Node, _regex)) { if (result.Type == ParseStepType.Match) { match = true; break; } if (result.Type == ParseStepType.Fail) { continue; } if (result.Type == ParseStepType.Break) { break; } } if (result.Type != ParseStepType.Break && engine.State.Index <= engine.Input.Length) { yield return(result .ConvertToOuterContext(outerEngine.Input, modifier, this, n => ReferenceEquals(n, _regex), message => message.Replace(_regex.NodeType, NodeType)) .AsLookaround() .WithSkipAdvanceOnFail(ShouldSkipAdvance)); } } if (match) { if (!Negative) { // TODO: we need to forward any captures from the look-around to the outer engine. yield return(ParseStep.Pass(this, "", outerEngine.State, engine.State.Plus(modifier)).WithSkipAdvanceOnFail(ShouldSkipAdvance)); } else { yield return(ParseStep.Fail(this, outerEngine.State, engine.State.Plus(modifier)).WithSkipAdvanceOnFail(ShouldSkipAdvance)); } } else { if (!Negative) { yield return(ParseStep.Fail(this, outerEngine.State, engine.State.Plus(modifier)).WithSkipAdvanceOnFail(ShouldSkipAdvance)); } else { // TODO: we need to forward any captures from the look-around to the outer engine. yield return(ParseStep.Pass(this, "", outerEngine.State, engine.State.Plus(modifier)).WithSkipAdvanceOnFail(ShouldSkipAdvance)); } } yield return(ParseStep.EndLookaround(this).WithSkipAdvanceOnFail(ShouldSkipAdvance)); yield return(ParseStep.Break(this).WithSkipAdvanceOnFail(ShouldSkipAdvance)); }
internal override IEnumerable <ParseStep> Parse(IRegexEngine engine) { yield return(ParseStep.BeginParse(this, engine.State)); var initialState = engine.State; var choiceIndex = 0; foreach (var choice in Choices) { var matchedText = ""; var choicePassed = false; foreach (var result in choice.Parse(engine)) { if (ReferenceEquals(choice, result.Node) && result.Type == ParseStepType.Break) { break; } yield return(result); if (ReferenceEquals(choice, result.Node)) { if (result.Type == ParseStepType.Pass) { matchedText = result.MatchedText; choicePassed = true; if (choiceIndex < Choices.Count - 1) { // Only save state if we're not the last choice... yield return(ParseStep.StateSaved(this, initialState, string.Format("Saving state - index {0}", engine.State.Index))); } } } } if (choicePassed) { yield return(ParseStep.Pass(this, matchedText, initialState, engine.State)); yield return(ParseStep.Break(this)); // TODO: lazy quantifiers might act in a similar manner as alternation here... yield return(ParseStep.Backtrack(this, initialState, engine.State)); } else { if (engine.State.Index != initialState.Index) { yield return(ParseStep.ResetIndex(this, initialState, engine.State)); engine.State = initialState; } } choiceIndex++; } yield return(ParseStep.Fail(this, initialState, engine.State)); yield return(ParseStep.Break(this)); }