public override Token ParseToken(LexReader reader) { var start = reader.GetPosition(); string value = reader.ConsumeEntireMatch(Regex); return(value == null ? null : Init(start, reader.GetPosition(), value)); }
public LexTokenizer(LexReader reader, IEnumerable <Token.Parser> tokenParsers, ICollection <Type> ignoreTokens, Type stopToken, bool swallowWhitespace) : base(makeEnumerable(reader, tokenParsers, ignoreTokens, stopToken, swallowWhitespace)) { if (!IndexInRange(0)) { throw new Exception("There are no tokens in the token stream."); } }
public static string LexStringLiteral(LexReader reader, IDictionary <char, char> basicEscapes, IDictionary <char, Func <LexReader, string> > advancedEscapes, string openingSequence, string closingSequence, bool escapeClosingByDoubling) { var builder = new StringBuilder(); if (!reader.ContinuesWith(openingSequence)) { return(null); } var startingPos = reader.GetPosition(); string doubleClosingSequence = closingSequence + closingSequence; reader.Consume(openingSequence.Length); while (true) { if (reader.EndOfFile()) { throw new LexException(reader.GetPosition(), "Unexpected end of string literal.").AddPosition(startingPos, "Start of string literal").Freeze(); } if (escapeClosingByDoubling && reader.ContinuesWith(doubleClosingSequence)) { reader.Consume(doubleClosingSequence.Length); builder.Append(doubleClosingSequence); } else if (reader.ContinuesWith(closingSequence)) { reader.Consume(closingSequence.Length); return(builder.ToString()); } else if ((basicEscapes != null || advancedEscapes != null) && reader.ContinuesWith("\\")) { reader.Consume(1); char escape = reader.ConsumeChar(); char replacement; Func <LexReader, string> replacementFunc; if (basicEscapes.TryGetValue(escape, out replacement)) { builder.Append(replacement); } else if (advancedEscapes.TryGetValue(escape, out replacementFunc)) { builder.Append(replacementFunc(reader)); } else { throw new LexException(reader.GetPosition(-1), @"Unrecognized escape sequence: ""\{0}"".".Fmt(escape)); } // could introduce support for other behaviours on unrecognized \x, such as no-escape (\x => \x) or as-is escape (\x => x) } else { builder.Append(reader.ConsumeChar()); } } }
public override Token ParseToken(LexReader reader) { var start = reader.GetPosition(); string literal = LexUtil.LexStringLiteral(reader, BasicEscapes, AdvancedEscapes, OpeningSequence, ClosingSequence, EscapeClosingByDoubling); return(literal == null ? null : new StringLiteralToken(start, reader.GetPosition()) { Value = literal }); }
public static string Lex0xHexInteger(LexReader reader) { if (!reader.ContinuesWith("0x") && !reader.ContinuesWith("0X")) return null; reader.Consume(2); string result = reader.ConsumeStringWhile(IsHexadecimalDigit); if (result.Length == 0) throw new LexException(reader.GetPosition(-2), "Hexadecimal integers starting with the \"0x\" prefix must have at least one hex digit."); return (result.Length == 0) ? null : result; }
public static string LexCsharpUnicodeVariableLenCharEscape(LexReader reader) { string sequence = reader.ConsumeStringWhile(IsHexadecimalDigit, 4); if (sequence.Length == 0) { throw new LexException(reader.GetPosition(), "Unicode escape sequence is too short (requires at least one hex digit)."); } return(((char)int.Parse(sequence, NumberStyles.HexNumber)).ToString()); }
public override Token ParseToken(LexReader reader) { if (reader.EndOfFile()) { return(new EndOfFileToken(reader.GetPosition())); } else { return(null); } }
public override Token ParseToken(LexReader reader) { foreach (var tokenstr in _operators) { if (reader.ContinuesWith(tokenstr)) { var start = reader.GetPosition(); reader.Consume(tokenstr.Length); return(new BuiltinToken(start, reader.GetPosition(), tokenstr)); } } return(null); }
public static string Lex0xHexInteger(LexReader reader) { if (!reader.ContinuesWith("0x") && !reader.ContinuesWith("0X")) { return(null); } reader.Consume(2); string result = reader.ConsumeStringWhile(IsHexadecimalDigit); if (result.Length == 0) { throw new LexException(reader.GetPosition(-2), "Hexadecimal integers starting with the \"0x\" prefix must have at least one hex digit."); } return((result.Length == 0) ? null : result); }
public static string LexStringLiteral(LexReader reader, IDictionary<char, char> basicEscapes, IDictionary<char, Func<LexReader, string>> advancedEscapes, string openingSequence, string closingSequence, bool escapeClosingByDoubling) { var builder = new StringBuilder(); if (!reader.ContinuesWith(openingSequence)) return null; var startingPos = reader.GetPosition(); string doubleClosingSequence = closingSequence + closingSequence; reader.Consume(openingSequence.Length); while (true) { if (reader.EndOfFile()) throw new LexException(reader.GetPosition(), "Unexpected end of string literal.").AddPosition(startingPos, "Start of string literal").Freeze(); if (escapeClosingByDoubling && reader.ContinuesWith(doubleClosingSequence)) { reader.Consume(doubleClosingSequence.Length); builder.Append(doubleClosingSequence); } else if (reader.ContinuesWith(closingSequence)) { reader.Consume(closingSequence.Length); return builder.ToString(); } else if ((basicEscapes != null || advancedEscapes != null) && reader.ContinuesWith("\\")) { reader.Consume(1); char escape = reader.ConsumeChar(); char replacement; Func<LexReader, string> replacementFunc; if (basicEscapes.TryGetValue(escape, out replacement)) builder.Append(replacement); else if (advancedEscapes.TryGetValue(escape, out replacementFunc)) builder.Append(replacementFunc(reader)); else throw new LexException(reader.GetPosition(-1), @"Unrecognized escape sequence: ""\{0}"".".Fmt(escape)); // could introduce support for other behaviours on unrecognized \x, such as no-escape (\x => \x) or as-is escape (\x => x) } else { builder.Append(reader.ConsumeChar()); } } }
public static string LexCsharpUnicodeFixedLenCharEscape(LexReader reader, bool islong) { string numberOfChars = islong ? "eight" : "four"; int numChars = islong ? 8 : 4; string sequence = reader.ConsumeStringWhile(IsHexadecimalDigit, numChars); if (sequence.Length < numChars) { throw new LexException(reader.GetPosition(), "Unicode escape sequence is too short (requires {0} hex digits).".Fmt(numberOfChars)); } if (islong) { return(char.ConvertFromUtf32(int.Parse(sequence, NumberStyles.HexNumber))); } else { return(new string((char)int.Parse(sequence, NumberStyles.HexNumber), 1)); } }
private static IEnumerable <Token> makeEnumerable(LexReader reader, IEnumerable <Token.Parser> tokenParsers, ICollection <Type> ignoreTokens, Type stopToken, bool swallowWhitespace) { while (true) { if (swallowWhitespace) { reader.ConsumeAnyWhitespace(); } var token = tokenParsers.Select(p => p.ParseToken(reader)).FirstOrDefault(t => t != null); if (token == null) { throw new LexException(reader.GetPosition(), "Unrecognized sequence of characters."); } var type = token.GetType(); if (!ignoreTokens.Contains(type)) { yield return(token); } if (type == stopToken) { yield break; } } }
public static string LexDecimalInteger(LexReader reader) { string result = reader.ConsumeStringWhile(IsDecimalDigit); return((result.Length == 0) ? null : result); }
public LexPosition(LexReader reader, int offset) { _reader = reader; _offset = offset; }
/// <summary> /// Parses the next token of a specific kind from the lex reader. Implementations must do one of the following: /// <list type="bullet"> /// <item>return null if the reader does not appear to contain this kind of token at the current location - while leaving the reader where it is</item> /// <item>return a parsed token instance, advancing the reader to just after the parsed token</item> /// <item>throw a <see cref="LexException"/> with a detailed description of the problem</item> /// </list> /// </summary> public abstract Token ParseToken(LexReader reader);
public LexPosition(LexReader reader) : this(reader, 0) { }
public static bool ParseFancyBaseInteger(LexReader reader) { throw new NotImplementedException(); }
public static string LexStringLiteral(LexReader reader, IDictionary <char, char> basicEscapes, string openingSequence, string closingSequence, bool escapeClosingByDoubling) { return(LexStringLiteral(reader, basicEscapes, null, openingSequence, closingSequence, escapeClosingByDoubling)); }
public static string LexDecimalInteger(LexReader reader) { string result = reader.ConsumeStringWhile(IsDecimalDigit); return (result.Length == 0) ? null : result; }
public static string LexCsharpUnicodeVariableLenCharEscape(LexReader reader) { string sequence = reader.ConsumeStringWhile(IsHexadecimalDigit, 4); if (sequence.Length == 0) throw new LexException(reader.GetPosition(), "Unicode escape sequence is too short (requires at least one hex digit)."); return ((char) int.Parse(sequence, NumberStyles.HexNumber)).ToString(); }
public static string LexCsharpUnicodeFixedLenCharEscape(LexReader reader, bool islong) { string numberOfChars = islong ? "eight" : "four"; int numChars = islong ? 8 : 4; string sequence = reader.ConsumeStringWhile(IsHexadecimalDigit, numChars); if (sequence.Length < numChars) throw new LexException(reader.GetPosition(), "Unicode escape sequence is too short (requires {0} hex digits).".Fmt(numberOfChars)); if (islong) return char.ConvertFromUtf32(int.Parse(sequence, NumberStyles.HexNumber)); else return new string((char) int.Parse(sequence, NumberStyles.HexNumber), 1); }
public static string LexStringLiteral(LexReader reader, string openingSequence, string closingSequence, bool escapeClosingByDoubling) { return(LexStringLiteral(reader, null, null, openingSequence, closingSequence, escapeClosingByDoubling)); }
public static string LexStringLiteral(LexReader reader, string openingSequence, string closingSequence, bool escapeClosingByDoubling) { return LexStringLiteral(reader, null, null, openingSequence, closingSequence, escapeClosingByDoubling); }
public static string LexStringLiteral(LexReader reader, IDictionary<char, char> basicEscapes, string openingSequence, string closingSequence, bool escapeClosingByDoubling) { return LexStringLiteral(reader, basicEscapes, null, openingSequence, closingSequence, escapeClosingByDoubling); }