public static ErlangStringToken Lex(TextBuffer buffer) { var sb = new StringBuilder(); var c = buffer.Peek(); Debug.Assert(IsStringStart(c)); buffer.Advance(); bool isEscape = false; string error = null; while (buffer.TextRemains()) { c = buffer.Peek(); if (isEscape) { buffer.Advance(); isEscape = false; sb.Append(c); } else if (IsStringStart(c)) // string start and end are the same thing { buffer.Advance(); // swallow and move on break; } else if (c == '\n' || c == '\r') { error = "Expected string terminator"; break; } else { buffer.Advance(); if (c == '\\') { isEscape = true; } else { sb.Append(c); } } } return(new ErlangStringToken(sb.ToString()) { Error = error }); }
private static ErlangCommentTrivia LexComment(TextBuffer buffer) { var comment = new StringBuilder(); var offset = buffer.Offset; comment.Append(buffer.Peek()); buffer.Advance(); while (buffer.TextRemains()) { var c = buffer.Peek(); if (IsNewline(c)) { break; } comment.Append(c); buffer.Advance(); } return(new ErlangCommentTrivia(comment.ToString(), offset)); }
internal static ErlangToken Lex(TextBuffer buffer) { var sb = new StringBuilder(); var first = buffer.Peek(); var isComplex = IsSingleQuote(first); sb.Append(first); buffer.Advance(); while (buffer.TextRemains()) { var c = buffer.Peek(); if (IsAtomContinue(c, isComplex)) { sb.Append(c); buffer.Advance(); } else if (isComplex && IsSingleQuote(c)) { // end of token sb.Append(c); buffer.Advance(); break; } else { break; } } var text = sb.ToString(); if (ErlangKeywordToken.IsKeyword(text)) { return(new ErlangKeywordToken(text)); } else { return((ErlangOperatorToken.GetKeywordOperator(text) as ErlangToken) ?? new ErlangAtomToken(text)); } }
internal static ErlangVariableToken Lex(TextBuffer buffer) { var sb = new StringBuilder(); sb.Append(buffer.Peek()); buffer.Advance(); while (buffer.TextRemains()) { var c = buffer.Peek(); if (IsVariableContinue(c)) { sb.Append(c); buffer.Advance(); } else { break; } } return(new ErlangVariableToken(sb.ToString())); }
internal static ErlangMacroToken Lex(TextBuffer buffer) { var sb = new StringBuilder(); var first = buffer.Peek(); Debug.Assert(IsMacroStart(first)); buffer.Advance(); sb.Append(first); while (buffer.TextRemains()) { var c = buffer.Peek(); if (IsMacroContinue(c)) { buffer.Advance(); sb.Append(c); } else { break; } } return(new ErlangMacroToken(sb.ToString())); }
public static IEnumerable <ErlangToken> Tokenize(TextBuffer buffer) { var tokenList = new List <ErlangToken>(); var triviaList = new List <ErlangTrivia>(); var whitespace = new StringBuilder(); var lastStart = 0; var whitespaceStart = 0; var lastLine = 1; void FlushWhitespace() { if (whitespace.Length > 0) { triviaList.Add(new ErlangWhitespaceTrivia(whitespace.ToString(), whitespaceStart)); whitespace.Clear(); } } void FlushAndAdd(ErlangToken token) { if (token != null) { FlushWhitespace(); token.LeadingTrivia = triviaList; token.Offset = lastStart; token.Line = lastLine; lastStart = buffer.Offset; tokenList.Add(token); triviaList = new List <ErlangTrivia>(); } } while (buffer.TextRemains()) { var c = buffer.Peek(); if (IsWhitespace(c)) { if (whitespace.Length == 0) { whitespaceStart = buffer.Offset; } else { whitespaceStart++; } whitespace.Append(c); lastStart++; if (c == '\n') { lastLine++; } buffer.Advance(); } else if (IsCommentStart(c)) { FlushWhitespace(); var comment = LexComment(buffer); triviaList.Add(comment); } else if (c == '(') { buffer.Advance(); FlushAndAdd(new ErlangLeftParenToken()); } else if (c == ')') { buffer.Advance(); FlushAndAdd(new ErlangRightParenToken()); } else if (c == ',') { buffer.Advance(); FlushAndAdd(new ErlangCommaToken()); } else if (c == '*') { buffer.Advance(); FlushAndAdd(new ErlangAsteriskToken()); } else if (c == '.') { buffer.Advance(); if (buffer.TextRemains() && buffer.Peek() == '.') { // found a second dot buffer.Advance(); if (buffer.TextRemains() && buffer.Peek() == '.') { // found a third dot buffer.Advance(); FlushAndAdd(new ErlangDotDotDotToken()); } else { FlushAndAdd(new ErlangDotDotToken()); } } else { FlushAndAdd(new ErlangDotToken()); } } else if (c == '[') { buffer.Advance(); FlushAndAdd(new ErlangLeftBracketToken()); } else if (c == ']') { buffer.Advance(); FlushAndAdd(new ErlangRightBracketToken()); } else if (c == '{') { buffer.Advance(); FlushAndAdd(new ErlangLeftBraceToken()); } else if (c == '}') { buffer.Advance(); FlushAndAdd(new ErlangRightBraceToken()); } else if (c == ';') { buffer.Advance(); FlushAndAdd(new ErlangSemicolonToken()); } else if (c == '!') { buffer.Advance(); FlushAndAdd(new ErlangBangToken()); } else if (c == '#') { buffer.Advance(); FlushAndAdd(new ErlangHashToken()); } else if (c == ':') { FlushWhitespace(); buffer.Advance(); if (buffer.TextRemains() && buffer.Peek() == ':') { buffer.Advance(); FlushAndAdd(new ErlangColonColonToken()); } else { FlushAndAdd(new ErlangColonToken()); } } else if (c == '+') { FlushWhitespace(); buffer.Advance(); if (buffer.TextRemains() && buffer.Peek() == '+') { buffer.Advance(); FlushAndAdd(new ErlangPlusPlusToken()); } else { FlushAndAdd(new ErlangPlusToken()); } } else if (c == '-') { FlushWhitespace(); buffer.Advance(); if (buffer.TextRemains()) { switch (buffer.Peek()) { case '-': buffer.Advance(); FlushAndAdd(new ErlangMinusMinusToken()); break; case '>': buffer.Advance(); FlushAndAdd(new ErlangMinusGreaterToken()); break; default: FlushAndAdd(new ErlangMinusToken()); break; } } else { FlushAndAdd(new ErlangMinusToken()); } } else if (c == '/') { FlushWhitespace(); buffer.Advance(); if (buffer.TextRemains() && buffer.Peek() == '=') { buffer.Advance(); FlushAndAdd(new ErlangSlashEqualsToken()); } else { FlushAndAdd(new ErlangSlashToken()); } } else if (c == '>') { FlushWhitespace(); buffer.Advance(); if (buffer.TextRemains()) { switch (buffer.Peek()) { case '>': buffer.Advance(); FlushAndAdd(new ErlangGreaterGreaterToken()); break; case '=': buffer.Advance(); FlushAndAdd(new ErlangGreaterEqualsToken()); break; default: FlushAndAdd(new ErlangGreaterToken()); break; } } else { FlushAndAdd(new ErlangGreaterToken()); } } else if (c == '<') { FlushWhitespace(); buffer.Advance(); if (buffer.TextRemains()) { switch (buffer.Peek()) { case '<': buffer.Advance(); FlushAndAdd(new ErlangLessLessToken()); break; case '-': buffer.Advance(); FlushAndAdd(new ErlangLessMinusToken()); break; default: FlushAndAdd(new ErlangLessToken()); break; } } else { FlushAndAdd(new ErlangLessToken()); } } else if (c == '=') { FlushWhitespace(); buffer.Advance(); if (buffer.TextRemains()) { switch (buffer.Peek()) { case '=': buffer.Advance(); FlushAndAdd(new ErlangEqualsEqualsToken()); break; case '<': buffer.Advance(); FlushAndAdd(new ErlangEqualsLessToken()); break; case ':': buffer.Advance(); if (buffer.TextRemains() && buffer.Peek() == '=') { buffer.Advance(); FlushAndAdd(new ErlangEqualsColonEqualsToken()); } else { buffer.Retreat(); FlushAndAdd(new ErlangEqualsToken()); } break; case '/': buffer.Advance(); if (buffer.TextRemains() && buffer.Peek() == '=') { buffer.Advance(); FlushAndAdd(new ErlangEqualsSlashEqualsToken()); } else { buffer.Retreat(); FlushAndAdd(new ErlangEqualsToken()); } break; default: FlushAndAdd(new ErlangEqualsToken()); break; } } else { FlushAndAdd(new ErlangEqualsToken()); } } else if (c == '|') { FlushWhitespace(); buffer.Advance(); if (buffer.TextRemains() && buffer.Peek() == '|') { buffer.Advance(); FlushAndAdd(new ErlangPipePipeToken()); } else { FlushAndAdd(new ErlangPipeToken()); } } else if (c == '$') { buffer.Advance(); if (buffer.TextRemains()) { var next = buffer.Peek(); buffer.Advance(); FlushAndAdd(new ErlangNumberToken(string.Format("${0}", next), (double)next)); } else { FlushAndAdd(new ErlangErrorToken(c, "Unexpected end of stream")); } } else if (ErlangAtomToken.IsAtomStart(c)) { var atom = ErlangAtomToken.Lex(buffer); FlushAndAdd(atom); } else if (ErlangVariableToken.IsVariableStart(c)) { var variable = ErlangVariableToken.Lex(buffer); FlushAndAdd(variable); } else if (ErlangNumberToken.IsNumberStart(c)) { var number = ErlangNumberToken.Lex(buffer); FlushAndAdd(number); } else if (ErlangStringToken.IsStringStart(c)) { var str = ErlangStringToken.Lex(buffer); FlushAndAdd(str); } else if (ErlangMacroToken.IsMacroStart(c)) { var macro = ErlangMacroToken.Lex(buffer); FlushAndAdd(macro); } else { buffer.Advance(); FlushAndAdd(new ErlangErrorToken(c, "Unexpected operator")); } } // add final trailing trivia if (tokenList.Count > 0 || triviaList.Count > 0) { tokenList.Last().TrailingTrivia = triviaList; } return(tokenList); }
public static ErlangNumberToken Lex(TextBuffer buffer) { var sb = new StringBuilder(); sb.Append(buffer.Peek()); buffer.Advance(); bool seenHash = false; bool seenE = false; bool seenDecimal = false; var last = default(char); while (buffer.TextRemains()) { var c = buffer.Peek(); if (c == '#') { if (!seenHash && !seenDecimal) { seenHash = true; buffer.Advance(); sb.Append(c); } else { // premature end of number break; } } else if (c == '.') { if (!seenDecimal && !seenHash) { seenDecimal = true; buffer.Advance(); sb.Append(c); } else { // premature end of number break; } } else if ((c == 'e' || c == 'E') && !seenHash) { if (!seenE) { seenE = true; buffer.Advance(); sb.Append(c); } else { // premature end of number break; } } else if (c == '+' || c == '-') { if (seenE && (last == 'e' || last == 'E')) { buffer.Advance(); sb.Append(c); } else { // possibly already saw sign break; } } else if (IsNumberContinue(c)) { buffer.Advance(); sb.Append(c); } else { break; } last = c; } Debug.Assert(!(seenDecimal && seenHash)); // should not have seen both Debug.Assert(!(seenE && seenHash)); // should not have seen both if (last == '.') { // numbers can't end in a decimal point buffer.Retreat(); sb.Remove(sb.Length - 1, 1); seenDecimal = false; } var text = sb.ToString(); double doubleValue = default(double); BigInteger bigValue = default(BigInteger); string error = null; if (!seenHash) { // simple parsing if (seenDecimal || seenE) { doubleValue = Convert.ToDouble(text); } else { bigValue = BigInteger.Parse(text); } } else { // complex hash parsing var parts = text.Split("#".ToCharArray(), 2); int @base = Convert.ToInt32(parts[0]); if (@base >= 2 && @base <= 36) { int val = 0; for (int i = 0; i < parts[1].Length; i++) { int digitValue = 0; var l = parts[1][i]; if (l >= '0' && l <= '9') { digitValue = l - '0'; } else { digitValue = char.ToUpperInvariant(l) - 'A' + 10; } if (digitValue < 0 || digitValue >= @base) { error = string.Format("Digit '{0}' not valid for base '{1}'", l, @base); return(new ErlangNumberToken(text) { Error = error }); } val = (val * @base) + digitValue; } bigValue = val; } else { error = "Base must be between 2 and 36 inclusive."; } } if (seenDecimal) { return new ErlangNumberToken(text, doubleValue) { Error = error } } ; else { return new ErlangNumberToken(text, bigValue) { Error = error } }; } }