コード例 #1
0
ファイル: LexerParser.cs プロジェクト: fschuhi/6502.Net
        static Token ParseToken(char previousChar, Token previousToken, RandomAccessIterator <char> iterator, bool parsingAssembly = true)
        {
            char c = iterator.Current;

            while (char.IsWhiteSpace(c))
            {
                if (c == NewLine && parsingAssembly)
                {
                    iterator.Rewind(iterator.Index - 1);
                    return(null);
                }
                c = iterator.GetNext();
            }
            if ((c == ';' && parsingAssembly) || c == EOF)
            {
                return(null);
            }

            var token = new Token();

            //first case, simplest
            var nextChar = iterator.PeekNext();

            if (char.IsDigit(c) ||
                char.IsLetter(c) ||
                c == '_' ||
                c == '?' ||
                (c == '.' && char.IsLetterOrDigit(nextChar)) ||
                (c == '\\' && char.IsLetterOrDigit(nextChar)))
            {
                token.Type = TokenType.Operand;
                if (char.IsDigit(c) || (c == '.' && char.IsDigit(nextChar)))
                {
                    if (char.IsDigit(c) && previousChar == '$')
                    {
                        token.Name = ScanTo(previousChar, iterator, FirstNonHex);
                    }
                    else if (c == '0' && (nextChar == 'b' ||
                                          nextChar == 'B' ||
                                          nextChar == 'o' ||
                                          nextChar == 'O' ||
                                          nextChar == 'x' ||
                                          nextChar == 'X'))
                    {
                        token.Name = ScanTo(previousChar, iterator, FirstNonNonBase10);
                    }
                    else
                    {
                        token.Name = ScanTo(previousChar, iterator, FirstNonNumeric);
                    }
                }
                else if (c == '\\')
                {
                    iterator.MoveNext();
                    token.Name = c + ScanTo(previousChar, iterator, FirstNonLetterOrDigit);
                }
                else if (c == '?')
                {
                    token.UnparsedName =
                        token.Name     = "?";
                    return(token);
                }
                else
                {
                    token.UnparsedName =
                        token.Name     = ScanTo(previousChar, iterator, FirstNonSymbol);
                    if (parsingAssembly && !Assembler.Options.CaseSensitive)
                    {
                        token.Name = token.Name.ToLower();
                    }

                    if (parsingAssembly && Assembler.InstructionLookupRules.Any(rule => rule(token.Name)))
                    {
                        token.Type = TokenType.Instruction;
                    }
                    else if (iterator.Current == '(' ||
                             (iterator.Current != NewLine && char.IsWhiteSpace(iterator.Current) &&
                              iterator.PeekNextSkipping(NonNewLineWhiteSpace) == '('))
                    {
                        token.Type         = TokenType.Operator;
                        token.OperatorType = OperatorType.Function;
                    }
                    else
                    {
                        token.Type = TokenType.Operand;
                    }
                }
            }
            else if (previousToken != null &&
                     previousToken.Name.Equals("%") &&
                     previousToken.OperatorType == OperatorType.Unary &&
                     (c == '.' || c == '#'))
            {
                // alternative binary string parsing
                token.Type = TokenType.Operand;
                token.Name = ScanTo(previousChar, iterator, FirstNonAltBin).Replace('.', '0')
                             .Replace('#', '1');
            }
            else if (c == '"' || c == SingleQuote)
            {
                var open         = c;
                var quoteBuilder = new StringBuilder(c.ToString());
                var escaped      = false;
                while ((c = iterator.GetNext()) != open && c != char.MinValue)
                {
                    quoteBuilder.Append(c);
                    if (c == '\\')
                    {
                        escaped = true;
                        quoteBuilder.Append(iterator.GetNext());
                    }
                }
                if (c == char.MinValue)
                {
                    throw new ExpressionException(iterator.Index, $"Quote string not enclosed.");
                }
                quoteBuilder.Append(c);
                var unescaped = escaped ? Regex.Unescape(quoteBuilder.ToString()) : quoteBuilder.ToString();
                if (c == '\'' && unescaped.Length > 3)
                {
                    throw new ExpressionException(iterator.Index, "Too many characters in character literal.");
                }
                token.Name = unescaped;
                token.Type = TokenType.Operand;
            }
            else
            {
                if (c == '+' || c == '-')
                {
                    /*
                     * Scenarios for parsing '+' or '-', since they can function as different things
                     * in an expression.
                     * 1. The binary operator:
                     *  a. OPERAND+3 / ...)+(... => single '+' sandwiched between two operands/groupings
                     *  b. OPERAND++3 / ...)++(... => the first '+' is a binary operator since it is to the
                     *     right of an operand/grouping. We need to split off the single '++' to two
                     *     separate '+' tokens. What kind of token is the second '+'? We worry about that later.
                     *  c. OPERAND+++3 / ...)+++(... => again, the first '+' is a binary operator. We need to split
                     *     it off from the rest of the string of '+' characters, and we worry about later.
                     * 2. The unary operator:
                     *  a. +3 / +(... => single '+' immediately preceding an operand/grouping.
                     *  b. ++3 / ++(... => parser doesn't accept C-style prefix (or postfix) operators, so one of these is an
                     *     anonymous label. Which one? Easy, the first. Split the '+' string.
                     * 3. A full expression mixing both:
                     *  a. OPERAND+++3 / ...)+++(... => From scenario 1.c, we know the first '+' is a binary operator,
                     *     which leaves us with => '++3' left, which from scenario 2.b. we know the first '+'
                     *     has to be an operand. So we split the string again, so that the next scan leaves us with
                     *     '+3', so the third and final plus is a unary operator.
                     * OPERAND => operand
                     * +       => binary operator
                     * +       => operand
                     * +       => unary operator
                     * 3/(     => operand/grouping
                     * 4. A line reference:
                     *   a. + => Simplest scenario.
                     *   b. ++, +++, ++++, etc. => Treat as one.
                     */
                    // Get the full string
                    token.Name = ScanTo(previousChar, iterator, FirstNonPlusMinus);
                    if (previousToken != null && (previousToken.Type == TokenType.Operand || previousToken.Name.Equals(")")))
                    {
                        // looking backward at the previous token, if it's an operand or grouping then we
                        // know this is a binary
                        token.Type         = TokenType.Operator;
                        token.OperatorType = OperatorType.Binary;
                        if (token.Name.Length > 1) // we need to split off the rest of the string so we have a single char '+'
                        {
                            token.Name = c.ToString();
                            iterator.Rewind(iterator.Index - token.Position - 1);
                        }
                    }
                    else if (!IsNotOperand(nextChar) || nextChar == '(')
                    {
                        // looking at the very next character in the input stream, if it's an operand or grouping
                        // then we know this is a unary
                        if (token.Name.Length > 1)
                        {
                            // If the string is greater than one character,
                            // then it's not a unary, it's an operand AND a unary. So we split off the
                            // rest of the string.
                            token.Name = c.ToString();
                            iterator.Rewind(iterator.Index - token.Position - 1);
                            token.Type = TokenType.Operand;
                        }
                        else
                        {
                            token.Type         = TokenType.Operator;
                            token.OperatorType = OperatorType.Unary;
                        }
                    }
                    else
                    {
                        token.Type = TokenType.Operand;
                    }
                }
                else if (c == '*')
                {
                    // Same as +/- scenario above, if the previous token is an operand or grouping,
                    // we need to treat the splat as a binary operator.
                    if (previousToken != null && (previousToken.Type == TokenType.Operand || previousToken.Name.Equals(")")))
                    {
                        token.Type         = TokenType.Operator;
                        token.OperatorType = OperatorType.Binary;
                    }
                    else
                    {
                        // but since there is no unary version of this we will treat as an operand, and let the evaluator
                        // deal with any problems like *OPERAND /*(
                        token.Type = TokenType.Operand;
                    }
                    token.Name = c.ToString();
                }
                else
                {
                    // not a number, symbol, string, or special (+, -, *) character. So we just treat as an operator
                    token.Type = TokenType.Operator;
                    if (c.IsSeparator() || c.IsOpenOperator() || c.IsClosedOperator())
                    {
                        token.Name = c.ToString();
                        if (c.IsSeparator())
                        {
                            token.OperatorType = OperatorType.Separator;
                        }
                        else if (c.IsOpenOperator())
                        {
                            token.OperatorType = OperatorType.Open;
                        }
                        else
                        {
                            token.OperatorType = OperatorType.Closed;
                        }
                    }
                    else
                    {
                        token.Name         = ScanTo(previousChar, iterator, FirstNonMatchingOperator);
                        token.UnparsedName = token.Name;

                        /* The general strategy to determine whether an operator is unary or binary:
                         *  1. Is it actually one of the defined unary types?
                         *  2. Peek at the next character. Is it a group or operand, or not?
                         *  3. Look behind at the previous token. Is it also a group or operand, or not?
                         *  4. If the token does NOT follow an operand or group, AND it precedes a group character,
                         *     or operand character, then it is a unary.
                         *  5. All other cases, binary.
                         *
                         */
                        if (
                            (
                                (
                                    c.IsUnaryOperator() &&
                                    (
                                        !IsNotOperand(nextChar) ||
                                        nextChar == '(' ||
                                        nextChar.IsRadixOperator() ||
                                        nextChar.IsUnaryOperator()
                                    )
                                ) ||
                                (
                                    c.IsRadixOperator() && char.IsLetterOrDigit(nextChar)
                                ) ||
                                (
                                    c == '%' && (nextChar == '.' || nextChar == '#')
                                )
                            ) &&
                            (previousToken == null ||
                             (previousToken.Type != TokenType.Operand &&
                              !previousToken.Name.Equals(")")
                             )
                            )
                            )
                        {
                            token.OperatorType = OperatorType.Unary;
                        }
                        else
                        {
                            token.OperatorType = OperatorType.Binary;
                        }
                    }
                }
            }
            if (string.IsNullOrEmpty(token.UnparsedName))
            {
                token.UnparsedName = token.Name;
            }
            if (iterator.Current != token.Name[^ 1])
            {
                iterator.Rewind(iterator.Index - 1);
            }
            return(token);
        }