internal static XbnfProduction Parse(ParseContext pc) { var result = new XbnfProduction(); pc.TrySkipCCommentsAndWhiteSpace(); // read identifier result.Name = ParseIdentifier(pc); // read attributes if ('<' == pc.Current) { pc.Advance(); while (-1 != pc.Current && '>' != pc.Current) { result.Attributes.Add(XbnfAttribute.Parse(pc)); pc.TrySkipCCommentsAndWhiteSpace(); pc.Expecting('>', ','); if (',' == pc.Current) { pc.Advance(); } } pc.Expecting('>'); pc.Advance(); } pc.TrySkipCCommentsAndWhiteSpace(); pc.Expecting(';', '='); if ('=' == pc.Current) { pc.Advance(); result.Expression = XbnfExpression.Parse(pc); } pc.Expecting(';'); pc.Advance(); return(result); }
public void Advances() { var to = new ParseContext <Numbers>(Stock.Tokens.Stream(Numbers.Three, Numbers.Two, Numbers.One), Numbers.EOF); Assert.AreEqual(Numbers.Three, to.Current.Id); Assert.IsTrue(to.Advance()); Assert.AreEqual(Numbers.Two, to.Current.Id); Assert.IsTrue(to.Advance()); Assert.AreEqual(Numbers.One, to.Current.Id); }
public void EofIsAtEndOfSource() { const string source = "OneTwoThree"; var to = new ParseContext <Numbers>(Stock.Lexers.GetOneTwoThree().Tokenize(source), Numbers.EOF); to.Advance(); // Two to.Advance(); // Three to.Advance(); // EOF Assert.AreEqual(Numbers.EOF, to.Current.Id); Assert.AreEqual(source.Length, to.Current.Start); Assert.AreEqual(0, to.Current.Length); }
public void AdancedPastEndBecomesEOF() { var to = new ParseContext <Numbers>(Stock.Tokens.Stream(Numbers.One).ToList(), Numbers.EOF); Assert.IsFalse(to.Advance()); Assert.AreEqual(Numbers.EOF, to.Current.Id); }
static void _ParseAttributes(EbnfDocument doc, string id, EbnfProduction prod, ParseContext pc) { pc.TrySkipCCommentsAndWhiteSpace(); pc.Expecting('<'); pc.Advance(); while (-1 != pc.Current && '>' != pc.Current) { _ParseAttribute(doc, id, prod, pc); pc.TrySkipCCommentsAndWhiteSpace(); pc.Expecting(',', '>'); if (',' == pc.Current) { pc.Advance(); } } pc.Expecting('>'); pc.Advance(); pc.TrySkipCCommentsAndWhiteSpace(); }
static void _ParseProduction(EbnfDocument doc, ParseContext pc) { pc.TrySkipCCommentsAndWhiteSpace(); var line = pc.Line; var column = pc.Column; var position = pc.Position; var id = _ParseIdentifier(pc); pc.TrySkipCCommentsAndWhiteSpace(); EbnfProduction prod = null; if (!doc.Productions.TryGetValue(id, out prod)) { prod = new EbnfProduction(); doc.Productions.Add(id, prod); } if ('<' == pc.Current) { _ParseAttributes(doc, id, prod, pc); pc.TrySkipCCommentsAndWhiteSpace(); } pc.Expecting('='); pc.Advance(); pc.Expecting(); var expr = _ParseExpression(doc, pc); pc.TrySkipCCommentsAndWhiteSpace(); pc.Expecting(';'); pc.Advance(); pc.TrySkipCCommentsAndWhiteSpace(); // transform this into an OrExpression with the previous if (null != prod.Expression) { prod.Expression = new EbnfOrExpression(prod.Expression, expr); } else { prod.Expression = expr; } prod.SetLocationInfo(line, column, position); }
static RegexExpression _ParseModifier(RegexExpression expr, ParseContext pc) { var line = pc.Line; var column = pc.Column; var position = pc.Position; switch (pc.Current) { case '*': expr = new RegexRepeatExpression(expr); expr.SetLocation(line, column, position); pc.Advance(); break; case '+': expr = new RegexRepeatExpression(expr, 1); expr.SetLocation(line, column, position); pc.Advance(); break; case '?': expr = new RegexOptionalExpression(expr); expr.SetLocation(line, column, position); pc.Advance(); break; case '{': pc.Advance(); pc.TrySkipWhiteSpace(); pc.Expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',', '}'); var min = -1; var max = -1; if (',' != pc.Current && '}' != pc.Current) { var l = pc.CaptureBuffer.Length; pc.TryReadDigits(); min = int.Parse(pc.GetCapture(l)); pc.TrySkipWhiteSpace(); } if (',' == pc.Current) { pc.Advance(); pc.TrySkipWhiteSpace(); pc.Expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '}'); if ('}' != pc.Current) { var l = pc.CaptureBuffer.Length; pc.TryReadDigits(); max = int.Parse(pc.GetCapture(l)); pc.TrySkipWhiteSpace(); } } pc.Expecting('}'); pc.Advance(); expr = new RegexRepeatExpression(expr, min, max); expr.SetLocation(line, column, position); break; } return(expr); }
static bool _DoMatchDfa(CharDfaEntry[] dfaTable, ParseContext context) { // track our current state var state = 0; // prepare the parse context context.EnsureStarted(); while (true) { // if no more input if (-1 == context.Current) { // if we accept, return that return(-1 != dfaTable[state].AcceptSymbolId); } // move by current character var newState = MoveDfa(dfaTable, state, (char)context.Current); // we couldn't match anything if (-1 == newState) { // if we accept, return that if (-1 != dfaTable[state].AcceptSymbolId) { return(true); } // otherwise error // advance the input context.Advance(); return(false); } // store the current character context.CaptureCurrent(); // advance the input context.Advance(); // iterate to our next states state = newState; } }
static bool _SkipWhiteSpace(ParseContext pc) { pc.EnsureStarted(); if (-1 == pc.Current || '\n' == pc.Current || !char.IsWhiteSpace((char)pc.Current)) { return(false); } while (-1 != pc.Advance() && '\n' != pc.Current && char.IsWhiteSpace((char)pc.Current)) { ; } return(true); }
static CfgRule _Parse(ParseContext pc) { var result = new CfgRule(); pc.TrySkipWhiteSpace(); pc.ClearCapture(); pc.TryReadUntil(false, ' ', '\t', '\r', '\n', '\f', '\v', '-'); result.Left = pc.Capture; pc.TrySkipWhiteSpace(); pc.Expecting('-'); pc.Advance(); pc.Expecting('>'); pc.Advance(); while (-1 != pc.Current && '\n' != pc.Current) { pc.TrySkipWhiteSpace(); pc.ClearCapture(); pc.TryReadUntil(false, ' ', '\t', '\r', '\n', '\f', '\v'); result.Right.Add(pc.Capture); } return(result); }
bool _DoMatchDfa(ParseContext context) { // track the current state var state = this; while (true) { // if no more input if (-1 == context.Current) { // if we accept, return that return(state.IsAccepting); } // move by current character var newState = state.MoveDfa((char)context.Current); // we couldn't match anything if (null == newState) { // if we accept, return that if (state.IsAccepting) { return(true); } // otherwise error // advance the input context.Advance(); return(false); } // store the current character context.CaptureCurrent(); // advance the input context.Advance(); // iterate to our next state state = newState; } }
// almost the same as our lex methods bool _DoMatch(ParseContext context) { // get the initial states var states = FillEpsilonClosure(); while (true) { // if no more input if (-1 == context.Current) { // if we accept, return that return(IsAnyAccepting(states)); } // move by current character var newStates = FillMove(states, (char)context.Current); // we couldn't match anything if (0 == newStates.Count) { // if we accept, return that if (IsAnyAccepting(states)) { return(true); } // otherwise error // advance the input context.Advance(); return(false); } // store the current character context.CaptureCurrent(); // advance the input context.Advance(); // iterate to our next states states = newStates; } }
static void _ParseAttribute(EbnfDocument doc, string id, EbnfProduction prod, ParseContext pc) { pc.TrySkipCCommentsAndWhiteSpace(); var attrid = _ParseIdentifier(pc); pc.TrySkipCCommentsAndWhiteSpace(); pc.Expecting('=', '>', ','); object val = true; if ('=' == pc.Current) { pc.Advance(); val = pc.ParseJsonValue(); } pc.Expecting(',', '>'); prod.Attributes[attrid] = val; pc.TrySkipCCommentsAndWhiteSpace(); }
public void AdanceWellBeyondEnd() { var to = new ParseContext <Numbers>(Stock.Tokens.Stream(Numbers.One), Numbers.EOF); Assert.IsFalse(to.Advance()); Assert.IsFalse(to.Advance()); Assert.IsFalse(to.Advance()); Assert.IsFalse(to.Advance()); Assert.IsFalse(to.Advance()); Assert.IsFalse(to.Advance()); Assert.AreEqual(Numbers.EOF, to.Current.Id); }
internal static XbnfAttribute Parse(ParseContext pc) { pc.TrySkipCCommentsAndWhiteSpace(); var attr = new XbnfAttribute(); attr.SetLocation(pc.Line, pc.Column, pc.Position); attr.Name = ParseIdentifier(pc); pc.TrySkipCCommentsAndWhiteSpace(); pc.Expecting(',', '=', ',', '>'); if ('=' == pc.Current) { pc.Advance(); attr.Value = pc.ParseJsonValue(); } else { attr.Value = true; } return(attr); }
internal static LexAttribute Parse(ParseContext pc) { SkipCommentsAndWhitespace(pc); var attr = new LexAttribute(); attr.SetLocation(pc.Line, pc.Column, pc.Position); attr.Name = _ParseAttrName(pc); SkipCommentsAndWhitespace(pc); pc.Expecting(',', '=', ',', '>', '\n'); if ('=' == pc.Current) { pc.Advance(); attr.Value = pc.ParseJsonValue(); } else { attr.Value = true; } return(attr); }
static string _ParseIdentifier(ParseContext pc) { pc.TrySkipCCommentsAndWhiteSpace(); if (-1 == pc.Current) { pc.Expecting(); return(null); } var l = pc.CaptureBuffer.Length; if ('_' != pc.Current && !char.IsLetter((char)pc.Current)) { pc.Expecting("ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".ToCharArray().Convert <int>().ToArray()); } pc.CaptureCurrent(); while (-1 != pc.Advance() && ('_' == pc.Current || '-' == pc.Current || char.IsLetterOrDigit((char)pc.Current))) { pc.CaptureCurrent(); } pc.TrySkipCCommentsAndWhiteSpace(); return(pc.GetCapture(l)); }
protected bool ParseDateTime(string s, DateTimePart part) { ParseContext context = new ParseContext(s); bool bDatePart = ( part & DateTimePart.Date ) != 0; bool bTimePart = ( part & DateTimePart.Time ) != 0; int year = 0, month = 0, day = 0; int hour = 0, minute = 0; double second = 0; eTZ = ETZ.Missing; offsetTZ = 0; if ( bDatePart ) { // parse date bool bNegative = context.CheckAndAdvance( '-' ); if ( (part & DateTimePart.Year ) != 0 ) { int digits = 0; int temp = 0; while ( context.ReadDigitAndAdvance( ref temp, 1, 9 ) ) { year = year * 10 + temp; digits += 1; temp = 0; if (digits >= 8) // overflow return false; } if ( digits < 4 ) // invalid. return false; if ( digits > 4 && year < 10000 ) return false; if (bNegative) year = -year; } if ( (part & ( DateTimePart.Month | DateTimePart.Day )) != 0 ) { if ( !context.CheckAndAdvance( '-' ) ) return false; if ( ( part & DateTimePart.Month ) != 0 ) { if ( !context.ReadDigitAndAdvance( ref month, 10, 1 ) ) return false; if ( !context.ReadDigitAndAdvance( ref month, 1, month < 10 ? 9 : 2 ) ) return false; if ( month == 0 ) return false; } if ( ( part & DateTimePart.Day ) != 0 ) { if ( !context.CheckAndAdvance( '-') ) return false; int maxFirstDigit = month != 2 ? 3 : 2; // complicate things by making them complicated. if ( !context.ReadDigitAndAdvance( ref day, 10, maxFirstDigit ) ) return false; if ( !context.ReadDigitAndAdvance( ref day, 1, 9 ) ) return false; if ( day == 0 || day > 31 ) return false; if ( ( part & DateTimePart.Month ) != 0 ) { bool b1 = month <= 7; bool b2 = ( month & 1 ) == 0; // month 1, 3, 5, 7, 8, 10, 12 if ( b1 == b2 && day > 30 ) return false; // february. if ( month == 2 && day > 29 ) return false; // leap years. if ( month == 2 && ( part & DateTimePart.Year ) != 0 && ( year % 4 != 0 || year % 100 == 0 ) && year % 400 != 0 && day > 28 ) return false; } } } if ( bTimePart ) { // a 'T' must follow if ( !context.CheckAndAdvance( 'T') ) return false; } } if ( bTimePart ) { // check format here // hour from 0 to 2 if ( !context.ReadDigitAndAdvance( ref hour, 10, 2 ) ) return false; if ( !context.ReadDigitAndAdvance( ref hour, 1, hour < 20 ? 9 : 4 ) ) return false; if ( !context.CheckAndAdvance( ':' ) ) return false; int maxFirstDigit = hour == 24 ? 0 : 5; int maxSecondDigit = hour == 24 ? 0 : 9; if ( !context.ReadDigitAndAdvance( ref minute, 10, maxFirstDigit ) ) return false; if ( !context.ReadDigitAndAdvance( ref minute, 1, maxSecondDigit ) ) return false; if ( !context.CheckAndAdvance( ':' ) ) return false; int secondInt = 0; if ( !context.ReadDigitAndAdvance( ref secondInt, 10, maxFirstDigit ) ) return false; if ( !context.ReadDigitAndAdvance( ref secondInt, 1, maxSecondDigit ) ) return false; second = secondInt; if ( context.CheckAndAdvance( '.' ) ) { // fraction. do whatever seems fit. int val = 0; int digits = 0; while ( context.ReadDigitAndAdvance( ref val, 1, 9) ) { val *= 10; digits += 1; if ( digits >= 8 ) // precision loss - ignore break; } if ( digits == 0 ) return false; second += val * Math.Pow( 10.0, -digits - 1 ); // skip any further digits. while ( context.ReadDigitAndAdvance( ref val, 0, 9) ) ; } } // timezone if ( context.CheckAndAdvance('Z') ) { // timezone specified, it is UTC. eTZ = ETZ.UTC; offsetTZ = 0; } else if ( context.Check('+') || context.Check('-' ) ) { // timezone offset, in hour:minute format bool bNegative = context.Check('-'); context.Advance(); // do not check the hour part, for those who are obscure. int temp = 0; if ( !context.ReadDigitAndAdvance( ref temp, 600, 9 ) ) return false; if ( !context.ReadDigitAndAdvance( ref temp, 60, 9 ) ) return false; if ( !context.CheckAndAdvance( ':' ) ) return false; if ( !context.ReadDigitAndAdvance( ref temp, 10, 5 ) ) return false; if ( !context.ReadDigitAndAdvance( ref temp, 1, 9 ) ) return false; eTZ = ETZ.Offset; offsetTZ = bNegative ? -temp : temp; } if ( context.IsValid() ) return false; // C# specific if (year <= 0) year = 1; if (month == 0) month = 1; bool badjust = false; if (hour == 24) { hour = 0; badjust = true; } if (day == 0) day = 1; try { myValue = new System.DateTime(year, month, day, hour, minute, (int)second, (int)(second * 1000) % 1000); if (badjust) myValue.AddDays(1); } catch { return false; } return true; }
bool ParseDateTime(string s, DateTimePart part) { ParseContext context = new ParseContext(s); bool bDatePart = (part & DateTimePart.Date) != 0; bool bTimePart = (part & DateTimePart.Time) != 0; int year = 0, month = 0, day = 0; int hour = 0, minute = 0; double second = 0; timezone = NoTimezone; if (bDatePart) { // parse date bool bNegative = context.CheckAndAdvance('-'); if ((part & DateTimePart.Year) != 0) { int digits = 0; int temp = 0; while (context.ReadDigitAndAdvance(ref temp, 1, 9)) { year = year * 10 + temp; digits += 1; temp = 0; if (digits >= 8) // overflow { return(false); } } if (digits < 4) // invalid. { return(false); } if (digits > 4 && year < 10000) { return(false); } if (bNegative) { year = -year; } } if ((part & (DateTimePart.Month | DateTimePart.Day)) != 0) { if (!context.CheckAndAdvance('-')) { return(false); } if ((part & DateTimePart.Month) != 0) { if (!context.ReadDigitAndAdvance(ref month, 10, 1)) { return(false); } if (!context.ReadDigitAndAdvance(ref month, 1, month < 10 ? 9 : 2)) { return(false); } if (month == 0) { return(false); } } if ((part & DateTimePart.Day) != 0) { if (!context.CheckAndAdvance('-')) { return(false); } int maxFirstDigit = month != 2 ? 3 : 2; // complicate things by making them complicated. if (!context.ReadDigitAndAdvance(ref day, 10, maxFirstDigit)) { return(false); } if (!context.ReadDigitAndAdvance(ref day, 1, 9)) { return(false); } if (day == 0 || day > 31) { return(false); } if ((part & DateTimePart.Month) != 0) { bool b1 = month <= 7; bool b2 = (month & 1) == 0; // month 1, 3, 5, 7, 8, 10, 12 if (b1 == b2 && day > 30) { return(false); } // february. if (month == 2 && day > 29) { return(false); } // leap years. if (month == 2 && (part & DateTimePart.Year) != 0 && (year % 4 != 0 || year % 100 == 0) && year % 400 != 0 && day > 28) { return(false); } } } } if (bTimePart) { // a 'T' must follow if (!context.CheckAndAdvance('T')) { return(false); } } } if (bTimePart) { // check format here // hour from 0 to 2 if (!context.ReadDigitAndAdvance(ref hour, 10, 2)) { return(false); } if (!context.ReadDigitAndAdvance(ref hour, 1, hour < 20 ? 9 : 4)) { return(false); } if (!context.CheckAndAdvance(':')) { return(false); } int maxFirstDigit = hour == 24 ? 0 : 5; int maxSecondDigit = hour == 24 ? 0 : 9; if (!context.ReadDigitAndAdvance(ref minute, 10, maxFirstDigit)) { return(false); } if (!context.ReadDigitAndAdvance(ref minute, 1, maxSecondDigit)) { return(false); } if (!context.CheckAndAdvance(':')) { return(false); } int secondInt = 0; if (!context.ReadDigitAndAdvance(ref secondInt, 10, maxFirstDigit)) { return(false); } if (!context.ReadDigitAndAdvance(ref secondInt, 1, maxSecondDigit)) { return(false); } second = secondInt; if (context.CheckAndAdvance('.')) { // fraction. do whatever seems fit. int val = 0; int digits = 0; while (context.ReadDigitAndAdvance(ref val, 1, 9)) { val *= 10; digits += 1; if (digits >= 8) // precision loss - ignore { break; } } if (digits == 0) { return(false); } second += val * System.Math.Pow(10.0, -digits - 1); // skip any further digits. while (context.ReadDigitAndAdvance(ref val, 0, 9)) { ; } } } // timezone if (context.CheckAndAdvance('Z')) { // timezone specified, it is UTC. timezone = 0; } else if (context.Check('+') || context.Check('-')) { // timezone offset, in hour:minute format bool bNegative = context.Check('-'); context.Advance(); // do not check the hour part, for those who are obscure. int temp = 0; if (!context.ReadDigitAndAdvance(ref temp, 600, 9)) { return(false); } if (!context.ReadDigitAndAdvance(ref temp, 60, 9)) { return(false); } if (!context.CheckAndAdvance(':')) { return(false); } if (!context.ReadDigitAndAdvance(ref temp, 10, 5)) { return(false); } if (!context.ReadDigitAndAdvance(ref temp, 1, 9)) { return(false); } timezone = (short)(bNegative ? -temp : temp); } if (context.IsValid()) { return(false); } // C# specific if (year <= 0) { year = 1; } if (month == 0) { month = 1; } bool badjust = false; if (hour == 24) { hour = 0; badjust = true; } if (day == 0) { day = 1; } if ((part & DateTimePart.Year) == 0 && month == 2 && day == 29) { year = 4; } try { myValue = new System.DateTime(year, month, day, hour, minute, (int)second, (int)(second * 1000) % 1000); if (badjust) { myValue.AddDays(1); } } catch { return(false); } return(true); }
internal static RegexExpression Parse(ParseContext pc) { RegexExpression result = null, next = null; int ich; pc.EnsureStarted(); var line = pc.Line; var column = pc.Column; var position = pc.Position; while (true) { switch (pc.Current) { case -1: return(result); case '.': var nset = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetRangeEntry(char.MinValue, char.MaxValue) }, false); nset.SetLocation(line, column, position); if (null == result) { result = nset; } else { result = new RegexConcatExpression(result, nset); result.SetLocation(line, column, position); } pc.Advance(); result = _ParseModifier(result, pc); line = pc.Line; column = pc.Column; position = pc.Position; break; case '\\': if (-1 != (ich = _ParseEscape(pc))) { next = new RegexLiteralExpression((char)ich); next.SetLocation(line, column, position); next = _ParseModifier(next, pc); if (null != result) { result = new RegexConcatExpression(result, next); result.SetLocation(line, column, position); } else { result = next; } } else { pc.Expecting(); // throw an error return(null); // doesn't execute } line = pc.Line; column = pc.Column; position = pc.Position; break; case ')': return(result); case '(': pc.Advance(); pc.Expecting(); next = Parse(pc); pc.Expecting(')'); pc.Advance(); next = _ParseModifier(next, pc); if (null == result) { result = next; } else { result = new RegexConcatExpression(result, next); result.SetLocation(line, column, position); } line = pc.Line; column = pc.Column; position = pc.Position; break; case '|': if (-1 != pc.Advance()) { next = Parse(pc); result = new RegexOrExpression(result, next); result.SetLocation(line, column, position); } else { result = new RegexOrExpression(result, null); result.SetLocation(line, column, position); } line = pc.Line; column = pc.Column; position = pc.Position; break; case '[': pc.ClearCapture(); pc.Advance(); pc.Expecting(); bool not = false; if ('^' == pc.Current) { not = true; pc.Advance(); pc.Expecting(); } var ranges = _ParseRanges(pc); if (ranges.Count == 0) { System.Diagnostics.Debugger.Break(); } pc.Expecting(']'); pc.Advance(); next = new RegexCharsetExpression(ranges, not); next.SetLocation(line, column, position); next = _ParseModifier(next, pc); if (null == result) { result = next; } else { result = new RegexConcatExpression(result, next); result.SetLocation(pc.Line, pc.Column, pc.Position); } line = pc.Line; column = pc.Column; position = pc.Position; break; default: ich = pc.Current; next = new RegexLiteralExpression((char)ich); next.SetLocation(line, column, position); pc.Advance(); next = _ParseModifier(next, pc); if (null == result) { result = next; } else { result = new RegexConcatExpression(result, next); result.SetLocation(line, column, position); } line = pc.Line; column = pc.Column; position = pc.Position; break; } } }
/// <summary> /// Converts a string containing set of values separated by commas into /// a <see cref="Data"/> object for interaction with individual values /// </summary> /// <param name="csvContent"></param> /// <param name="options"></param> /// <returns></returns> public static Data Parse(string csvContent, ParseOptions options = null) { var context = new ParseContext <Tokens>(Lexer.Default.Tokenize(csvContent), Tokens.EOF); var r = new Data(); var addHeader = options?.HasHeaders ?? true; List <string> headers = null; var items = new List <string>(); while (context.Current.Id != Tokens.EOF) { switch (context.Current.Id) { case Tokens.Value: items.Add(context.Current.Value); break; case Tokens.EOL: if (addHeader) { headers = items; } else { r.AddRow(items); } addHeader = false; items = new List <string>(); break; // TODO: Add error handling and report these token types as errors //case Tokens.Unknown: // break; //case Tokens.Unterminated: // break; // There is nothing to be done with these token types //case Tokens.EOF: // break; //case Tokens.Delimiter: // break; //case Tokens.NullValue: // break; //case Tokens.Quoted: // break; //case Tokens.EmbeddedQuoted: // break; } context.Advance(); } if (items.Any()) { r.AddRow(items); } r.SetHeaders(headers?.Any() == true ? headers : Enumerable.Range(0, items.Count).Select(i => $"Column {i}").ToList()); return(r); }
static EbnfExpression _ParseExpression(EbnfDocument doc, ParseContext pc) { EbnfExpression current = null; EbnfExpression e; long position; int line; int column; pc.TrySkipCCommentsAndWhiteSpace(); position = pc.Position; line = pc.Line; column = pc.Column; while (-1 != pc.Current && ']' != pc.Current && ')' != pc.Current && '}' != pc.Current && ';' != pc.Current) { pc.TrySkipCCommentsAndWhiteSpace(); position = pc.Position; line = pc.Line; column = pc.Column; switch (pc.Current) { case '|': pc.Advance(); current = new EbnfOrExpression(current, _ParseExpression(doc, pc)); current.SetLocationInfo(line, column, position); break; case '(': pc.Advance(); e = _ParseExpression(doc, pc); current.SetLocationInfo(line, column, position); pc.Expecting(')'); pc.Advance(); e.SetLocationInfo(line, column, position); if (null == current) { current = e; } else { current = new EbnfConcatExpression(current, e); } break; case '[': pc.Advance(); e = new EbnfOptionalExpression(_ParseExpression(doc, pc)); e.SetLocationInfo(line, column, position); pc.TrySkipCCommentsAndWhiteSpace(); pc.Expecting(']'); pc.Advance(); if (null == current) { current = e; } else { current = new EbnfConcatExpression(current, e); } break; case '{': pc.Advance(); e = new EbnfRepeatExpression(_ParseExpression(doc, pc)); e.SetLocationInfo(line, column, position); pc.TrySkipCCommentsAndWhiteSpace(); pc.Expecting('}'); pc.Advance(); if ('+' == pc.Current) { pc.Advance(); ((EbnfRepeatExpression)e).IsOptional = false; } if (null == current) { current = e; } else { current = new EbnfConcatExpression(current, e); } break; case '\"': e = new EbnfLiteralExpression(pc.ParseJsonString()); if (null == current) { current = e; } else { current = new EbnfConcatExpression(current, e); } e.SetLocationInfo(line, column, position); break; case '\'': pc.Advance(); pc.ClearCapture(); pc.TryReadUntil('\'', '\\', false); pc.Expecting('\''); pc.Advance(); e = new EbnfRegexExpression(pc.Capture); if (null == current) { current = e; } else { current = new EbnfConcatExpression(current, e); } e.SetLocationInfo(line, column, position); break; case ';': case ']': case ')': case '}': return(current); default: e = new EbnfRefExpression(_ParseIdentifier(pc)); if (null == current) { current = e; } else { current = new EbnfConcatExpression(current, e); } e.SetLocationInfo(line, column, position); break; } } pc.TrySkipCCommentsAndWhiteSpace(); return(current); }
static int _ParseEscape(ParseContext pc) { if ('\\' != pc.Current) { return(-1); } if (-1 == pc.Advance()) { return(-1); } switch (pc.Current) { case 't': pc.Advance(); return('\t'); case 'n': pc.Advance(); return('\n'); case 'r': pc.Advance(); return('\r'); case 'x': if (-1 == pc.Advance()) { return('x'); } byte b = _FromHexChar((char)pc.Current); b <<= 4; if (-1 == pc.Advance()) { return(unchecked ((char)b)); } b |= _FromHexChar((char)pc.Current); return(unchecked ((char)b)); case 'u': if (-1 == pc.Advance()) { return('u'); } ushort u = _FromHexChar((char)pc.Current); u <<= 4; if (-1 == pc.Advance()) { return(unchecked ((char)u)); } u |= _FromHexChar((char)pc.Current); u <<= 4; if (-1 == pc.Advance()) { return(unchecked ((char)u)); } u |= _FromHexChar((char)pc.Current); u <<= 4; if (-1 == pc.Advance()) { return(unchecked ((char)u)); } u |= _FromHexChar((char)pc.Current); return(unchecked ((char)u)); default: int i = pc.Current; pc.Advance(); return((char)i); } }
static IList <RegexCharsetEntry> _ParseRanges(ParseContext pc) { pc.EnsureStarted(); var result = new List <RegexCharsetEntry>(); RegexCharsetEntry next = null; bool readDash = false; while (-1 != pc.Current && ']' != pc.Current) { switch (pc.Current) { case '[': // char class if (null != next) { result.Add(next); if (readDash) { result.Add(new RegexCharsetCharEntry('-')); } result.Add(new RegexCharsetCharEntry('-')); } pc.Advance(); pc.Expecting(':'); pc.Advance(); var l = pc.CaptureBuffer.Length; pc.TryReadUntil(':', false); var n = pc.GetCapture(l); pc.Advance(); pc.Expecting(']'); pc.Advance(); result.Add(new RegexCharsetClassEntry(n)); readDash = false; next = null; break; case '\\': //pc.Advance(); //pc.Expecting(); var ch = (char)_ParseEscape(pc); if (null == next) { next = new RegexCharsetCharEntry(ch); } else { if (readDash) { result.Add(new RegexCharsetRangeEntry(((RegexCharsetCharEntry)next).Value, ch)); next = null; readDash = false; } else { result.Add(next); next = new RegexCharsetCharEntry(ch); } } break; case '-': pc.Advance(); if (null == next) { next = new RegexCharsetCharEntry('-'); readDash = false; } else { if (readDash) { result.Add(next); } readDash = true; } break; default: if (null == next) { next = new RegexCharsetCharEntry((char)pc.Current); } else { if (readDash) { result.Add(new RegexCharsetRangeEntry(((RegexCharsetCharEntry)next).Value, (char)pc.Current)); next = null; readDash = false; } else { result.Add(next); next = new RegexCharsetCharEntry((char)pc.Current); } } pc.Advance(); break; } } if (null != next) { result.Add(next); if (readDash) { next = new RegexCharsetCharEntry('-'); result.Add(next); } } return(result); }
static CharFA <TAccept> _Parse(ParseContext pc, TAccept accept) { CharFA <TAccept> result = new CharFA <TAccept>(true, accept); CharFA <TAccept> f, next; int ch; pc.EnsureStarted(); var current = result; while (true) { switch (pc.Current) { case -1: return(result); case '.': pc.Advance(); f = current.FirstAcceptingState as CharFA <TAccept>; current = Set(new CharRange[] { new CharRange(char.MinValue, char.MaxValue) }, accept); switch (pc.Current) { case '*': current = Kleene(current, accept); pc.Advance(); break; case '+': current = Repeat(current, accept); pc.Advance(); break; case '?': current = Optional(current, accept); pc.Advance(); break; } f.IsAccepting = false; f.EpsilonTransitions.Add(current); break; case '\\': if (-1 != (ch = _ParseEscape(pc))) { next = null; switch (pc.Current) { case '*': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Kleene(next, accept); pc.Advance(); break; case '+': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Repeat(next, accept); pc.Advance(); break; case '?': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Optional(next, accept); pc.Advance(); break; default: current = current.FirstAcceptingState as CharFA <TAccept>; current.IsAccepting = false; current.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); break; } if (null != next) { current = current.FirstAcceptingState as CharFA <TAccept>; current.IsAccepting = false; current.EpsilonTransitions.Add(next); current = next; } } else { pc.Expecting(); // throw an error return(null); // doesn't execute } break; case ')': return(result); case '(': pc.Advance(); pc.Expecting(); f = current.FirstAcceptingState as CharFA <TAccept>; current = _Parse(pc, accept); pc.Expecting(')'); pc.Advance(); switch (pc.Current) { case '*': current = Kleene(current, accept); pc.Advance(); break; case '+': current = Repeat(current, accept); pc.Advance(); break; case '?': current = Optional(current, accept); pc.Advance(); break; } var ff = f.FirstAcceptingState; ff.EpsilonTransitions.Add(current); ff.IsAccepting = false; break; case '|': if (-1 != pc.Advance()) { current = _Parse(pc, accept); result = Or(new CharFA <TAccept>[] { result as CharFA <TAccept>, current as CharFA <TAccept> }, accept); } else { current = current.FirstAcceptingState as CharFA <TAccept>; result = Optional(result, accept); } break; case '[': pc.ClearCapture(); pc.Advance(); pc.Expecting(); bool not = false; if ('^' == pc.Current) { not = true; pc.Advance(); pc.Expecting(); } pc.TryReadUntil(']', '\\', false); pc.Expecting(']'); pc.Advance(); var r = (!not && "." == pc.Capture) ? new CharRange[] { new CharRange(char.MinValue, char.MaxValue) } : _ParseRanges(pc.Capture, true); if (not) { r = CharRange.NotRanges(r); } f = current.FirstAcceptingState as CharFA <TAccept>; current = Set(r, accept); switch (pc.Current) { case '*': current = Kleene(current, accept); pc.Advance(); break; case '+': current = Repeat(current, accept); pc.Advance(); break; case '?': current = Optional(current, accept); pc.Advance(); break; } f.IsAccepting = false; f.EpsilonTransitions.Add(current); break; default: ch = pc.Current; pc.Advance(); next = null; switch (pc.Current) { case '*': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Kleene(next, accept); pc.Advance(); break; case '+': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Repeat(next, accept); pc.Advance(); break; case '?': next = new CharFA <TAccept>(); next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); next = Optional(next, accept); pc.Advance(); break; default: current = current.FirstAcceptingState as CharFA <TAccept>; current.IsAccepting = false; current.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept)); break; } if (null != next) { current = current.FirstAcceptingState as CharFA <TAccept>; current.IsAccepting = false; current.EpsilonTransitions.Add(next); current = next; } break; } } }
internal static LexDocument Parse(ParseContext pc) { var result = new LexDocument(); while (-1 != pc.Current) { var line = pc.Line; var column = pc.Column; var position = pc.Position; LexNode.SkipCommentsAndWhitespace(pc); while ('\n' == pc.Current) { pc.Advance(); LexNode.SkipCommentsAndWhitespace(pc); } var id = LexNode.ParseIdentifier(pc); if (string.IsNullOrEmpty(id)) { pc.Advance(); LexNode.SkipCommentsAndWhitespace(pc); continue; } LexNode.SkipCommentsAndWhitespace(pc); pc.Expecting(':', '-', '='); if (':' == pc.Current) // attribute set { pc.Advance(); var d = new LexAttributeList(); while (-1 != pc.Current && '\n' != pc.Current) { var attr = LexAttribute.Parse(pc); d.Add(attr); LexNode.SkipCommentsAndWhitespace(pc); pc.Expecting('\n', ',', -1); if (',' == pc.Current) { pc.Advance(); } } result.AttributeSets.Add(id, d); LexNode.SkipCommentsAndWhitespace(pc); } else if ('=' == pc.Current) { pc.Advance(); LexNode.SkipCommentsAndWhitespace(pc); pc.Expecting('\''); pc.Advance(); var l = pc.CaptureBuffer.Length; pc.TryReadUntil('\'', '\\', false); pc.Expecting('\''); pc.Advance(); var rx = pc.GetCapture(l); // make sure to capture the line numbers properly: var rpc = ParseContext.Create(rx); rpc.EnsureStarted(); rpc.SetLocation(pc.Line, pc.Column, pc.Position); var rule = new LexRule(id, RegexExpression.Parse(rpc)); rule.SetLocation(line, column, position); result.Rules.Add(rule); } else if ('-' == pc.Current) { pc.TrySkipUntil('\n', true); } LexNode.SkipCommentsAndWhitespace(pc); if ('\n' == pc.Current) { pc.Advance(); } } return(result); }