static bool HandleEscape(TextInputRange input, StringBuilder result) { input.Extend(); var chr = input.EndChar; switch (chr) { case '\0': return(false); case 't': result.Append(value: '\t'); break; case 'r': result.Append(value: '\r'); break; case 'n': result.Append(value: '\n'); break; // TODO: add Unicode escape handling default: result.Append(chr); break; } input.Extend(); return(true); }
public static INumberLiteral Scan(TextInputRange input) { var chr = input.EndChar; if (!IsDecimalDigit(chr)) { return(null); } if (chr == '0') { var next = input.PeekChar(); switch (next) { case 'x': case 'X': input.Extend(nChars: 2); return(ScanNumber(input, radix: 16, isDigit: IsHexDigit)); case 'o': case 'O': input.Extend(nChars: 2); return(ScanNumber(input, radix: 8, isDigit: IsOctalDigit)); case 'b': case 'B': input.Extend(nChars: 2); return(ScanNumber(input, radix: 2, isDigit: IsBinaryDigit)); } } return(ScanDecimalNumber(input)); }
// Scan basic double quoted strings public static IStringLiteral Scan(TextInputRange input) { var chr = input.EndChar; if (!IsDoubleQuote(chr)) { return(null); } input.Extend(); var result = new StringBuilder(); while (true) { chr = input.EndChar; if (chr == '\0') { return(null); // file end or invalid input } if (IsDoubleQuote(chr)) { break; } if (input.IsEndNewline) { input.NewLine(); } else { if (!IsTab(chr) && char.IsControl(chr)) { HandleControl(input); } else { if (IsBackslash(chr)) { if (!HandleEscape(input, result)) { return(null); } } else { result.Append(chr); input.Extend(); } } } } input.Extend(); return(new StringLiteral { Content = result.ToString(), Range = input.Clone() }); }
/** * # single line * * #<optional># * block comments * #<optional># */ public static bool Scan(TextInputRange range) { if (range.EndChar != '#') { return(false); } do { range.Extend(); } while (range.IsEndValid && !range.IsEndNewline && !range.IsEndWhitespace && range.EndChar != '#'); // single line comment if (range.IsEndNewline) { return(true); } // is block comment if (range.EndChar == '#') { range.Extend(); var commentMarker = range.Text; while (range.IsEndValid && range.EndString(commentMarker.Length) != commentMarker) { if (range.IsEndNewline) { range.NewLine(); } else { range.Extend(); } } if (range.EndString(commentMarker.Length) != commentMarker) { throw new Exception(message: "Line Comment not Escaped."); } range.Extend(commentMarker.Length); } else { do { range.Extend(); } while (range.IsEndValid && !range.IsEndNewline); } return(true); }
static TokenData ScanSingleChar(TextInputRange input, Token token) { input.Extend(); return(new TokenData { Range = input.Clone(), Type = token }); }
static TokenData ScanWhitespaces(TextInputRange input) { input.Extend(); input.ExtendWhitespaces(); return(new TokenData { Range = input.Clone(), Type = Token.WhiteSpaceSeperator }); }
public void ExtendTest() { var input = new TextInputRange { File = new TextFile { Content = "ABC ", Filename = "" } }; input.Extend(nChars: 2); Assert.AreEqual(expected: "AB", actual: input.Text); Assert.AreEqual(expected: 3, actual: input.End.Column); }
public void CollapseWhitespacesTest() { var input = new TextInputRange { File = new TextFile { Content = "AB C", Filename = "" } }; input.Extend(nChars: 2); input.CollapseWhitespaces(); Assert.AreEqual(expected: 'C', actual: input.EndChar); Assert.AreEqual(expected: "", actual: input.Text); }
static bool IsStart(TextInputRange input) { var chr = input.EndChar; if (chr == '.') { input.Extend(); // dot is allowed at start but not later chr = input.EndChar; } if ('0' <= chr && '9' >= chr) { return(false); // digits are not allowed at start } return(IsContinue(input)); // no more special rules }
// this will scan the longest valid regular entry public static IIdentifierLiteral Scan(TextInputRange input) { if (!IsStart(input)) { input.Backtrack(); // dot might have been skipped return(null); } do { input.Extend(); } while (IsContinue(input)); return(new IdentifierLiteral { Content = input.Text, Range = input.Clone() }); }
static void HandleControl(TextInputRange input) { // do not add arbitrary control characters to internal strings. input.Extend(); }
static NumberLiteral ScanDecimalNumber(TextInputRange input) { var result = new NumberLiteral { Radix = 10 }; var chr = input.EndChar; while (IsDecimalDigit(chr)) { if (result.IntegerPart == null) { result.IntegerPart = ""; } if (!IsZero(chr) || !result.IntegerPart.IsEmpty()) { result.IntegerPart += chr; } do { input.Extend(); chr = input.EndChar; } while (IsIgnored(chr)); } if (IsDot(chr)) { result.FractionalPart = ""; input.Extend(); chr = input.EndChar; while (IsDecimalDigit(chr)) { result.FractionalPart += chr; do { input.Extend(); chr = input.EndChar; } while (IsIgnored(chr)); } } if (IsE(chr)) { input.Extend(); chr = input.EndChar; if (IsSign(chr)) { result.IsExponentPositive = IsPlus(chr); input.Extend(); chr = input.EndChar; } while (IsDecimalDigit(chr)) { if (result.ExponentPart == null) { result.ExponentPart = ""; } if (!IsZero(chr) || !result.ExponentPart.IsEmpty()) { result.ExponentPart += chr; } do { input.Extend(); chr = input.EndChar; } while (IsIgnored(chr)); } if (result.ExponentPart == null) { return(null); // if exponent started it has to contain value } } return(result.IsValid ? result : null); }
public static IEnumerable <TokenData> ScanFile(TextFile file) { var input = new TextInputRange { File = file }; while (true) { input.Collapse(); var chr = input.EndChar; // ReSharper disable once SwitchStatementMissingSomeCases switch (chr) { case '\0': yield break; case ' ': case '\t': yield return(ScanWhitespaces(input)); continue; case '\n': case '\r': yield return(ScanNewLine(input)); continue; case '#': yield return(ScanComment(input)); continue; case ',': yield return(ScanSingleChar(input, Token.CommaSeparator)); continue; case ';': yield return(ScanSingleChar(input, Token.SemicolonSeparator)); continue; case '[': yield return(ScanSingleChar(input, Token.SquareBracketOpen)); continue; case ']': yield return(ScanSingleChar(input, Token.SquareBracketClose)); continue; case '(': yield return(ScanSingleChar(input, Token.BracketOpen)); continue; case ')': yield return(ScanSingleChar(input, Token.BracketClose)); continue; case '"': yield return(ScanStringLiteral(input)); continue; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': yield return(ScanNumberLiteral(input)); continue; } var identifierLiteral = IdentifierScanner.Scan(input); if (identifierLiteral != null) { yield return(new TokenData { Range = identifierLiteral.Range, Type = Token.IdentifierLiteral, Data = identifierLiteral }); continue; } var operatorLiteral = OperatorScanner.Scan(input); if (operatorLiteral != null) { yield return(new TokenData { Range = operatorLiteral.Range, Type = Token.OperatorLiteral, Data = operatorLiteral }); continue; } input.Extend(); yield return(new TokenData { Range = input.Clone(), Type = Token.InvalidCharacter }); } }