/// <summary> /// Pattern matches through a string of text /// </summary> /// <param name="context">The parse context to search</param> /// <returns>A <see cref="CharFAMatch"/> that contains the match information, or null if the match is not found.</returns> public CharFAMatch Match(ParseContext context) { context.EnsureStarted(); var line = context.Line; var column = context.Column; var position = context.Position; var l = context.CaptureBuffer.Length; var success = false; // keep going until we find something or reach the end while (-1 != context.Current && !(success = _DoMatch(context))) { line = context.Line; column = context.Column; position = context.Position; l = context.CaptureBuffer.Length; } if (success) { return(new CharFAMatch( line, column, position, context.GetCapture(l))); } return(null); }
static Cfg _Parse(ParseContext pc) { var result = new Cfg(); pc.EnsureStarted(); while (-1 != pc.Current) { result.Rules.Add(_ParseRule(pc)); } return(result); }
static bool _SkipWhiteSpace(ParseContext pc) { pc.EnsureStarted(); if (-1 == pc.Current || '\n' == pc.Current || !char.IsWhiteSpace((char)pc.Current)) { return(false); } while (-1 != pc.Advance() && '\n' != pc.Current && char.IsWhiteSpace((char)pc.Current)) { ; } return(true); }
static bool _DoMatchDfa(CharDfaEntry[] dfaTable, ParseContext context) { // track our current state var state = 0; // prepare the parse context context.EnsureStarted(); while (true) { // if no more input if (-1 == context.Current) { // if we accept, return that return(-1 != dfaTable[state].AcceptSymbolId); } // move by current character var newState = MoveDfa(dfaTable, state, (char)context.Current); // we couldn't match anything if (-1 == newState) { // if we accept, return that if (-1 != dfaTable[state].AcceptSymbolId) { return(true); } // otherwise error // advance the input context.Advance(); return(false); } // store the current character context.CaptureCurrent(); // advance the input context.Advance(); // iterate to our next states state = newState; } }
static CharFA <TAccept> _Parse(ParseContext pc, TAccept accept) { CharFA <TAccept> result = new CharFA <TAccept>(true, accept); CharFA <TAccept> f, next; int ch; pc.EnsureStarted(); var current = result; while (true) { switch (pc.Current) { case -1: return(result); case '.': pc.Advance(); f = current.FirstAcceptingState as CharFA <TAccept>; current = Set(new CharRange[] { new CharRange(char.MinValue, char.MaxValue) }, accept); switch (pc.Current) { case '*': current = Kleene(current, accept); pc.Advance(); break; case '+': current = Repeat(current, accept); pc.Advance(); break; case '?': current = Optional(current, accept); pc.Advance(); break; } f.IsAccepting = false; f.EpsilonTransitions.Add(current); break; case '\\': if (-1 != (ch = _ParseEscape(pc))) { next = null; switch (pc.Current) { case '*': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Kleene(next, accept); pc.Advance(); break; case '+': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Repeat(next, accept); pc.Advance(); break; case '?': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Optional(next, accept); pc.Advance(); break; default: current = current.FirstAcceptingState as CharFA <TAccept>; current.IsAccepting = false; current.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); break; } if (null != next) { current = current.FirstAcceptingState as CharFA <TAccept>; current.IsAccepting = false; current.EpsilonTransitions.Add(next); current = next; } } else { pc.Expecting(); // throw an error return(null); // doesn't execute } break; case ')': return(result); case '(': pc.Advance(); pc.Expecting(); f = current.FirstAcceptingState as CharFA <TAccept>; current = _Parse(pc, accept); pc.Expecting(')'); pc.Advance(); switch (pc.Current) { case '*': current = Kleene(current, accept); pc.Advance(); break; case '+': current = Repeat(current, accept); pc.Advance(); break; case '?': current = Optional(current, accept); pc.Advance(); break; } var ff = f.FirstAcceptingState; ff.EpsilonTransitions.Add(current); ff.IsAccepting = false; break; case '|': if (-1 != pc.Advance()) { current = _Parse(pc, accept); result = Or(new CharFA <TAccept>[] { result as CharFA <TAccept>, current as CharFA <TAccept> }, accept); } else { current = current.FirstAcceptingState as CharFA <TAccept>; result = Optional(result, accept); } break; case '[': pc.ClearCapture(); pc.Advance(); pc.Expecting(); bool not = false; if ('^' == pc.Current) { not = true; pc.Advance(); pc.Expecting(); } pc.TryReadUntil(']', '\\', false); pc.Expecting(']'); pc.Advance(); var r = (!not && "." == pc.Capture) ? new CharRange[] { new CharRange(char.MinValue, char.MaxValue) } : _ParseRanges(pc.Capture, true); if (not) { r = CharRange.NotRanges(r); } f = current.FirstAcceptingState as CharFA <TAccept>; current = Set(r, accept); switch (pc.Current) { case '*': current = Kleene(current, accept); pc.Advance(); break; case '+': current = Repeat(current, accept); pc.Advance(); break; case '?': current = Optional(current, accept); pc.Advance(); break; } f.IsAccepting = false; f.EpsilonTransitions.Add(current); break; default: ch = pc.Current; pc.Advance(); next = null; switch (pc.Current) { case '*': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Kleene(next, accept); pc.Advance(); break; case '+': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Repeat(next, accept); pc.Advance(); break; case '?': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Optional(next, accept); pc.Advance(); break; default: current = current.FirstAcceptingState as CharFA <TAccept>; current.IsAccepting = false; current.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); break; } if (null != next) { current = current.FirstAcceptingState as CharFA <TAccept>; current.IsAccepting = false; current.EpsilonTransitions.Add(next); current = next; } break; } } }
internal static RegexExpression Parse(ParseContext pc) { RegexExpression result = null, next = null; int ich; pc.EnsureStarted(); var line = pc.Line; var column = pc.Column; var position = pc.Position; while (true) { switch (pc.Current) { case -1: return(result); case '.': var nset = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetRangeEntry(char.MinValue, char.MaxValue) }, false); nset.SetLocation(line, column, position); if (null == result) { result = nset; } else { result = new RegexConcatExpression(result, nset); result.SetLocation(line, column, position); } pc.Advance(); result = _ParseModifier(result, pc); line = pc.Line; column = pc.Column; position = pc.Position; break; case '\\': if (-1 != (ich = _ParseEscape(pc))) { next = new RegexLiteralExpression((char)ich); next.SetLocation(line, column, position); next = _ParseModifier(next, pc); if (null != result) { result = new RegexConcatExpression(result, next); result.SetLocation(line, column, position); } else { result = next; } } else { pc.Expecting(); // throw an error return(null); // doesn't execute } line = pc.Line; column = pc.Column; position = pc.Position; break; case ')': return(result); case '(': pc.Advance(); pc.Expecting(); next = Parse(pc); pc.Expecting(')'); pc.Advance(); next = _ParseModifier(next, pc); if (null == result) { result = next; } else { result = new RegexConcatExpression(result, next); result.SetLocation(line, column, position); } line = pc.Line; column = pc.Column; position = pc.Position; break; case '|': if (-1 != pc.Advance()) { next = Parse(pc); result = new RegexOrExpression(result, next); result.SetLocation(line, column, position); } else { result = new RegexOrExpression(result, null); result.SetLocation(line, column, position); } line = pc.Line; column = pc.Column; position = pc.Position; break; case '[': pc.ClearCapture(); pc.Advance(); pc.Expecting(); bool not = false; if ('^' == pc.Current) { not = true; pc.Advance(); pc.Expecting(); } var ranges = _ParseRanges(pc); if (ranges.Count == 0) { System.Diagnostics.Debugger.Break(); } pc.Expecting(']'); pc.Advance(); next = new RegexCharsetExpression(ranges, not); next.SetLocation(line, column, position); next = _ParseModifier(next, pc); if (null == result) { result = next; } else { result = new RegexConcatExpression(result, next); result.SetLocation(pc.Line, pc.Column, pc.Position); } line = pc.Line; column = pc.Column; position = pc.Position; break; default: ich = pc.Current; next = new RegexLiteralExpression((char)ich); next.SetLocation(line, column, position); pc.Advance(); next = _ParseModifier(next, pc); if (null == result) { result = next; } else { result = new RegexConcatExpression(result, next); result.SetLocation(line, column, position); } line = pc.Line; column = pc.Column; position = pc.Position; break; } } }
static IList <RegexCharsetEntry> _ParseRanges(ParseContext pc) { pc.EnsureStarted(); var result = new List <RegexCharsetEntry>(); RegexCharsetEntry next = null; bool readDash = false; while (-1 != pc.Current && ']' != pc.Current) { switch (pc.Current) { case '[': // char class if (null != next) { result.Add(next); if (readDash) { result.Add(new RegexCharsetCharEntry('-')); } result.Add(new RegexCharsetCharEntry('-')); } pc.Advance(); pc.Expecting(':'); pc.Advance(); var l = pc.CaptureBuffer.Length; pc.TryReadUntil(':', false); var n = pc.GetCapture(l); pc.Advance(); pc.Expecting(']'); pc.Advance(); result.Add(new RegexCharsetClassEntry(n)); readDash = false; next = null; break; case '\\': //pc.Advance(); //pc.Expecting(); var ch = (char)_ParseEscape(pc); if (null == next) { next = new RegexCharsetCharEntry(ch); } else { if (readDash) { result.Add(new RegexCharsetRangeEntry(((RegexCharsetCharEntry)next).Value, ch)); next = null; readDash = false; } else { result.Add(next); next = new RegexCharsetCharEntry(ch); } } break; case '-': pc.Advance(); if (null == next) { next = new RegexCharsetCharEntry('-'); readDash = false; } else { if (readDash) { result.Add(next); } readDash = true; } break; default: if (null == next) { next = new RegexCharsetCharEntry((char)pc.Current); } else { if (readDash) { result.Add(new RegexCharsetRangeEntry(((RegexCharsetCharEntry)next).Value, (char)pc.Current)); next = null; readDash = false; } else { result.Add(next); next = new RegexCharsetCharEntry((char)pc.Current); } } pc.Advance(); break; } } if (null != next) { result.Add(next); if (readDash) { next = new RegexCharsetCharEntry('-'); result.Add(next); } } return(result); }