Example #1
0
        public static IEnumerable <ErlangToken> Tokenize(TextBuffer buffer)
        {
            var tokenList       = new List <ErlangToken>();
            var triviaList      = new List <ErlangTrivia>();
            var whitespace      = new StringBuilder();
            var lastStart       = 0;
            var whitespaceStart = 0;
            var lastLine        = 1;

            void FlushWhitespace()
            {
                if (whitespace.Length > 0)
                {
                    triviaList.Add(new ErlangWhitespaceTrivia(whitespace.ToString(), whitespaceStart));
                    whitespace.Clear();
                }
            }

            void FlushAndAdd(ErlangToken token)
            {
                if (token != null)
                {
                    FlushWhitespace();
                    token.LeadingTrivia = triviaList;
                    token.Offset        = lastStart;
                    token.Line          = lastLine;
                    lastStart           = buffer.Offset;
                    tokenList.Add(token);
                    triviaList = new List <ErlangTrivia>();
                }
            }

            while (buffer.TextRemains())
            {
                var c = buffer.Peek();
                if (IsWhitespace(c))
                {
                    if (whitespace.Length == 0)
                    {
                        whitespaceStart = buffer.Offset;
                    }
                    else
                    {
                        whitespaceStart++;
                    }

                    whitespace.Append(c);
                    lastStart++;
                    if (c == '\n')
                    {
                        lastLine++;
                    }

                    buffer.Advance();
                }
                else if (IsCommentStart(c))
                {
                    FlushWhitespace();
                    var comment = LexComment(buffer);
                    triviaList.Add(comment);
                }
                else if (c == '(')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangLeftParenToken());
                }
                else if (c == ')')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangRightParenToken());
                }
                else if (c == ',')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangCommaToken());
                }
                else if (c == '*')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangAsteriskToken());
                }
                else if (c == '.')
                {
                    buffer.Advance();
                    if (buffer.TextRemains() && buffer.Peek() == '.')
                    {
                        // found a second dot
                        buffer.Advance();
                        if (buffer.TextRemains() && buffer.Peek() == '.')
                        {
                            // found a third dot
                            buffer.Advance();
                            FlushAndAdd(new ErlangDotDotDotToken());
                        }
                        else
                        {
                            FlushAndAdd(new ErlangDotDotToken());
                        }
                    }
                    else
                    {
                        FlushAndAdd(new ErlangDotToken());
                    }
                }
                else if (c == '[')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangLeftBracketToken());
                }
                else if (c == ']')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangRightBracketToken());
                }
                else if (c == '{')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangLeftBraceToken());
                }
                else if (c == '}')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangRightBraceToken());
                }
                else if (c == ';')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangSemicolonToken());
                }
                else if (c == '!')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangBangToken());
                }
                else if (c == '#')
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangHashToken());
                }
                else if (c == ':')
                {
                    FlushWhitespace();
                    buffer.Advance();
                    if (buffer.TextRemains() && buffer.Peek() == ':')
                    {
                        buffer.Advance();
                        FlushAndAdd(new ErlangColonColonToken());
                    }
                    else
                    {
                        FlushAndAdd(new ErlangColonToken());
                    }
                }
                else if (c == '+')
                {
                    FlushWhitespace();
                    buffer.Advance();
                    if (buffer.TextRemains() && buffer.Peek() == '+')
                    {
                        buffer.Advance();
                        FlushAndAdd(new ErlangPlusPlusToken());
                    }
                    else
                    {
                        FlushAndAdd(new ErlangPlusToken());
                    }
                }
                else if (c == '-')
                {
                    FlushWhitespace();
                    buffer.Advance();
                    if (buffer.TextRemains())
                    {
                        switch (buffer.Peek())
                        {
                        case '-':
                            buffer.Advance();
                            FlushAndAdd(new ErlangMinusMinusToken());
                            break;

                        case '>':
                            buffer.Advance();
                            FlushAndAdd(new ErlangMinusGreaterToken());
                            break;

                        default:
                            FlushAndAdd(new ErlangMinusToken());
                            break;
                        }
                    }
                    else
                    {
                        FlushAndAdd(new ErlangMinusToken());
                    }
                }
                else if (c == '/')
                {
                    FlushWhitespace();
                    buffer.Advance();
                    if (buffer.TextRemains() && buffer.Peek() == '=')
                    {
                        buffer.Advance();
                        FlushAndAdd(new ErlangSlashEqualsToken());
                    }
                    else
                    {
                        FlushAndAdd(new ErlangSlashToken());
                    }
                }
                else if (c == '>')
                {
                    FlushWhitespace();
                    buffer.Advance();
                    if (buffer.TextRemains())
                    {
                        switch (buffer.Peek())
                        {
                        case '>':
                            buffer.Advance();
                            FlushAndAdd(new ErlangGreaterGreaterToken());
                            break;

                        case '=':
                            buffer.Advance();
                            FlushAndAdd(new ErlangGreaterEqualsToken());
                            break;

                        default:
                            FlushAndAdd(new ErlangGreaterToken());
                            break;
                        }
                    }
                    else
                    {
                        FlushAndAdd(new ErlangGreaterToken());
                    }
                }
                else if (c == '<')
                {
                    FlushWhitespace();
                    buffer.Advance();
                    if (buffer.TextRemains())
                    {
                        switch (buffer.Peek())
                        {
                        case '<':
                            buffer.Advance();
                            FlushAndAdd(new ErlangLessLessToken());
                            break;

                        case '-':
                            buffer.Advance();
                            FlushAndAdd(new ErlangLessMinusToken());
                            break;

                        default:
                            FlushAndAdd(new ErlangLessToken());
                            break;
                        }
                    }
                    else
                    {
                        FlushAndAdd(new ErlangLessToken());
                    }
                }
                else if (c == '=')
                {
                    FlushWhitespace();
                    buffer.Advance();
                    if (buffer.TextRemains())
                    {
                        switch (buffer.Peek())
                        {
                        case '=':
                            buffer.Advance();
                            FlushAndAdd(new ErlangEqualsEqualsToken());
                            break;

                        case '<':
                            buffer.Advance();
                            FlushAndAdd(new ErlangEqualsLessToken());
                            break;

                        case ':':
                            buffer.Advance();
                            if (buffer.TextRemains() && buffer.Peek() == '=')
                            {
                                buffer.Advance();
                                FlushAndAdd(new ErlangEqualsColonEqualsToken());
                            }
                            else
                            {
                                buffer.Retreat();
                                FlushAndAdd(new ErlangEqualsToken());
                            }
                            break;

                        case '/':
                            buffer.Advance();
                            if (buffer.TextRemains() && buffer.Peek() == '=')
                            {
                                buffer.Advance();
                                FlushAndAdd(new ErlangEqualsSlashEqualsToken());
                            }
                            else
                            {
                                buffer.Retreat();
                                FlushAndAdd(new ErlangEqualsToken());
                            }
                            break;

                        default:
                            FlushAndAdd(new ErlangEqualsToken());
                            break;
                        }
                    }
                    else
                    {
                        FlushAndAdd(new ErlangEqualsToken());
                    }
                }
                else if (c == '|')
                {
                    FlushWhitespace();
                    buffer.Advance();
                    if (buffer.TextRemains() && buffer.Peek() == '|')
                    {
                        buffer.Advance();
                        FlushAndAdd(new ErlangPipePipeToken());
                    }
                    else
                    {
                        FlushAndAdd(new ErlangPipeToken());
                    }
                }
                else if (c == '$')
                {
                    buffer.Advance();
                    if (buffer.TextRemains())
                    {
                        var next = buffer.Peek();
                        buffer.Advance();
                        FlushAndAdd(new ErlangNumberToken(string.Format("${0}", next), (double)next));
                    }
                    else
                    {
                        FlushAndAdd(new ErlangErrorToken(c, "Unexpected end of stream"));
                    }
                }
                else if (ErlangAtomToken.IsAtomStart(c))
                {
                    var atom = ErlangAtomToken.Lex(buffer);
                    FlushAndAdd(atom);
                }
                else if (ErlangVariableToken.IsVariableStart(c))
                {
                    var variable = ErlangVariableToken.Lex(buffer);
                    FlushAndAdd(variable);
                }
                else if (ErlangNumberToken.IsNumberStart(c))
                {
                    var number = ErlangNumberToken.Lex(buffer);
                    FlushAndAdd(number);
                }
                else if (ErlangStringToken.IsStringStart(c))
                {
                    var str = ErlangStringToken.Lex(buffer);
                    FlushAndAdd(str);
                }
                else if (ErlangMacroToken.IsMacroStart(c))
                {
                    var macro = ErlangMacroToken.Lex(buffer);
                    FlushAndAdd(macro);
                }
                else
                {
                    buffer.Advance();
                    FlushAndAdd(new ErlangErrorToken(c, "Unexpected operator"));
                }
            }

            // add final trailing trivia
            if (tokenList.Count > 0 || triviaList.Count > 0)
            {
                tokenList.Last().TrailingTrivia = triviaList;
            }

            return(tokenList);
        }
Example #2
0
        public static ErlangNumberToken Lex(TextBuffer buffer)
        {
            var sb = new StringBuilder();

            sb.Append(buffer.Peek());
            buffer.Advance();
            bool seenHash    = false;
            bool seenE       = false;
            bool seenDecimal = false;
            var  last        = default(char);

            while (buffer.TextRemains())
            {
                var c = buffer.Peek();
                if (c == '#')
                {
                    if (!seenHash && !seenDecimal)
                    {
                        seenHash = true;
                        buffer.Advance();
                        sb.Append(c);
                    }
                    else
                    {
                        // premature end of number
                        break;
                    }
                }
                else if (c == '.')
                {
                    if (!seenDecimal && !seenHash)
                    {
                        seenDecimal = true;
                        buffer.Advance();
                        sb.Append(c);
                    }
                    else
                    {
                        // premature end of number
                        break;
                    }
                }
                else if ((c == 'e' || c == 'E') && !seenHash)
                {
                    if (!seenE)
                    {
                        seenE = true;
                        buffer.Advance();
                        sb.Append(c);
                    }
                    else
                    {
                        // premature end of number
                        break;
                    }
                }
                else if (c == '+' || c == '-')
                {
                    if (seenE && (last == 'e' || last == 'E'))
                    {
                        buffer.Advance();
                        sb.Append(c);
                    }
                    else
                    {
                        // possibly already saw sign
                        break;
                    }
                }
                else if (IsNumberContinue(c))
                {
                    buffer.Advance();
                    sb.Append(c);
                }
                else
                {
                    break;
                }

                last = c;
            }

            Debug.Assert(!(seenDecimal && seenHash)); // should not have seen both
            Debug.Assert(!(seenE && seenHash));       // should not have seen both
            if (last == '.')
            {
                // numbers can't end in a decimal point
                buffer.Retreat();
                sb.Remove(sb.Length - 1, 1);
                seenDecimal = false;
            }

            var        text        = sb.ToString();
            double     doubleValue = default(double);
            BigInteger bigValue    = default(BigInteger);
            string     error       = null;

            if (!seenHash)
            {
                // simple parsing
                if (seenDecimal || seenE)
                {
                    doubleValue = Convert.ToDouble(text);
                }
                else
                {
                    bigValue = BigInteger.Parse(text);
                }
            }
            else
            {
                // complex hash parsing
                var parts = text.Split("#".ToCharArray(), 2);
                int @base = Convert.ToInt32(parts[0]);
                if (@base >= 2 && @base <= 36)
                {
                    int val = 0;
                    for (int i = 0; i < parts[1].Length; i++)
                    {
                        int digitValue = 0;
                        var l          = parts[1][i];
                        if (l >= '0' && l <= '9')
                        {
                            digitValue = l - '0';
                        }
                        else
                        {
                            digitValue = char.ToUpperInvariant(l) - 'A' + 10;
                        }

                        if (digitValue < 0 || digitValue >= @base)
                        {
                            error = string.Format("Digit '{0}' not valid for base '{1}'", l, @base);
                            return(new ErlangNumberToken(text)
                            {
                                Error = error
                            });
                        }

                        val = (val * @base) + digitValue;
                    }

                    bigValue = val;
                }
                else
                {
                    error = "Base must be between 2 and 36 inclusive.";
                }
            }

            if (seenDecimal)
            {
                return new ErlangNumberToken(text, doubleValue)
                       {
                           Error = error
                       }
            }
            ;
            else
            {
                return new ErlangNumberToken(text, bigValue)
                       {
                           Error = error
                       }
            };
        }
    }