Example #1
0
        /// <summary>Parse next token from currently parsed line, starting at given position and
        /// add the retrieved token at end of given token list.</summary>
        /// <param name="aList">The token list where to add the newly recognized token.</param>
        /// <param name="aLineNo">Line number for diagnostics and debugging purpose.</param>
        /// <param name="rPos">The index in current source code line of the first not yet consumed
        /// character. On return this parameter will be updated to account for characters that would
        /// have been consumed.</param>
        protected void NextToken(TokenList aList, ref int rPos)
        {
            #region Pattern Notes

            // All patterns start with _, this makes them reserved. User can use too, but at own risk of conflict.
            //
            // Wildcards
            // -_REG or ??X
            // -_REG8 or ?H,?L
            // -_REG16 or ?X
            // -_REG32 or E?X
            //     - ? based ones are ugly and less clear
            // -_Keyword
            // -_ABC
            //
            //
            // Multiple Options (All caps only) - Registers only
            // Used to suport EAX,EBX - ie lists. But found out wasnt really needed. May add again later.
            //
            // -AX/AL - Conflict if we ever use /
            // -AX|AL - Conflict if we ever use |
            // -AX,AL - , is unlikely to ever be used as an operator and is logical as a separator. Method calls might use, but likely better to use a space
            //          since we will only allow simple arguments, not compound.
            // -_REG:AX|AL - End terminator issue
            // -_REG[AX|AL] - Conflict with existing indirect access. Is indirect access always numeric? I think x86 has some register based ones too.
            //
            //
            // Specific: Register, Keyword, AlphaNum
            // -EAX

            #endregion

            string xString = null;
            char   xChar1  = mData[mStart];
            var    xToken  = new Token();

            // Directives and literal assembler code.
            if (mAllWhitespace)
            {
                if (xChar1 == '!' || xChar1 == '/')
                {
                    rPos    = mData.Length; // This will account for the dummy whitespace at the end.
                    xString = mData.Substring(mStart + 1, rPos - mStart - 1).Trim();
                    // So ToString/Format wont generate error
                    xString = xString.Replace("{", "{{");
                    xString = xString.Replace("}", "}}");

                    // Fix issue #15662 with string length check.
                    // Fix issue #15663 with comparing from mData and not from xString anymore.
                    if (xChar1 == '!')
                    {
                        // Literal assembler code.
                        xToken.Type = TokenType.Line_LiteralAsm;
                    }
                    else if (xString.Length > 0)
                    {
                        char xChar2 = xString[0];
                        xString = xString.Substring(1);
                        if (xChar2 == '/')
                        {
                            xToken.Type = TokenType.Line_Comment;
                        }
                        else if (xChar2 == '!')
                        {
                            xToken.Type = TokenType.Line_Directive;
                        }
                    }
                }
            }

            if (xToken.Type == TokenType.Unknown)
            {
                xString = mData.Substring(mStart, rPos - mStart);

                if (string.IsNullOrWhiteSpace(xString) && xString.Length > 0)
                {
                    xToken.Type = TokenType.WhiteSpace;
                }
                else if (xChar1 == '\'')
                {
                    xToken.Type = TokenType.ValueString;
                    xString     = xString.Substring(1, xString.Length - 2);
                }
                else if (char.IsDigit(xChar1))
                {
                    xToken.Type = TokenType.ValueInt;
                    if (xString.StartsWith("0x"))
                    {
                        xToken.SetIntValue(Convert.ToUInt32(xString, 16));
                    }
                    else
                    {
                        xToken.SetIntValue(uint.Parse(xString));
                    }
                }
                else if (xChar1 == '$')
                {
                    xToken.Type = TokenType.ValueInt;
                    // Remove surrounding '
                    xString = "0x" + xString.Substring(1);
                    if (xString.StartsWith("0x"))
                    {
                        xToken.SetIntValue(Convert.ToUInt32(xString, 16));
                    }
                    else
                    {
                        xToken.SetIntValue(uint.Parse(xString));
                    }
                }
                else if (IsAlphaNum(xChar1))
                {
                    // This must be after check for ValueInt
                    string xUpper = xString.ToUpper();

                    // Special parsing when in pattern mode. We recognize some special strings
                    // which would otherwise be considered as simple AlphaNum token otherwise.
                    if (mAllowPatterns)
                    {
                        if (RegisterPatterns.Contains(xUpper))
                        {
                            xToken.Type = TokenType.Register;
                        }
                        else if (xUpper == "_KEYWORD")
                        {
                            xToken.Type = TokenType.Keyword;
                            xString     = null;
                        }
                        else if (xUpper == "_ABC")
                        {
                            xToken.Type = TokenType.AlphaNum;
                            xString     = null;
                        }
                        else if (xUpper == "_PCALL")
                        {
                            xString     = null;
                            xToken.Type = TokenType.Call;
                        }
                    }

                    if (xToken.Type == TokenType.Unknown)
                    {
                        XSRegisters.Register xRegister;
                        if (Registers.TryGetValue(xUpper, out xRegister))
                        {
                            xToken.Type = TokenType.Register;
                            xToken.SetRegister(xRegister);
                        }
                        else if (mKeywords.Contains(xUpper))
                        {
                            xToken.Type = TokenType.Keyword;
                        }
                        else if (xString.Contains("(") && xString.Contains(")") && IsAlphaNum(xChar1))
                        {
                            xToken.Type = TokenType.Call;
                        }
                        else
                        {
                            xToken.Type = TokenType.AlphaNum;
                        }
                    }
                }
                else if (Delimiters.Contains(xString))
                {
                    xToken.Type = TokenType.Delimiter;
                }
                else if (Operators.Contains(xString))
                {
                    xToken.Type = TokenType.Operator;
                }
            }

            xToken.RawValue    = xString;
            xToken.SrcPosStart = mStart;
            xToken.SrcPosEnd   = xToken.Type == TokenType.Call ? rPos : rPos - 1;
            if (mAllWhitespace && (xToken.Type != TokenType.WhiteSpace))
            {
                mAllWhitespace = false;
            }
            mStart = xToken.Type == TokenType.Call ? rPos + 1 : rPos;

            if (mIncludeWhiteSpace || (xToken.Type != TokenType.WhiteSpace))
            {
                aList.Add(xToken);
            }
        }
Example #2
0
        /// <summary>Consume text that has been provided to the class constructor, splitting it into
        /// a list of tokens.</summary>
        /// <param name="aLineNo">Line number for diagnostics and debugging.</param>
        /// <returns>The resulting tokens list.</returns>
        protected TokenList Parse()
        {
            // Save in comment, might be useful in future. Already had to dig it out of TFS once
            //var xRegex = new System.Text.RegularExpressions.Regex(@"(\W)");

            var      xResult       = new TokenList();
            CharType xLastCharType = CharType.WhiteSpace;
            CharType xCharType     = CharType.WhiteSpace;
            int      i             = 0;

            for (i = 0; i < mData.Length; i++)
            {
                char xChar = mData[i];
                // Extract string literal (surrounded with single quote characters).
                if (xChar == '\'')
                {
                    // Take data before the ' as a token.
                    NextToken(xResult, ref i);
                    // Now scan to the next ' taking into account escaped single quotes.
                    bool escapedCharacter = false;
                    for (i = i + 1; i < mData.Length; i++)
                    {
                        bool done = false;
                        switch (mData[i])
                        {
                        case '\'':
                            if (!escapedCharacter)
                            {
                                done = true;
                            }
                            break;

                        case '\\':
                            escapedCharacter = !escapedCharacter;
                            break;

                        default:
                            escapedCharacter = false;
                            break;
                        }
                        if (done)
                        {
                            break;
                        }
                    }
                    if (i == mData.Length)
                    {
                        throw new Exception("Unterminated string.");
                    }
                    i++;
                    xCharType = CharType.String;
                }
                else if (xChar == '(')
                {
                    for (i += 1; i < mData.Length; i++)
                    {
                        if (mData[i] == ')' && mData.LastIndexOf(")") <= i)
                        {
                            i++;
                            NextToken(xResult, ref i);
                            break;
                        }
                    }
                }
                else if (char.IsWhiteSpace(xChar))
                {
                    xCharType = CharType.WhiteSpace;
                }
                else if (IsAlphaNum(xChar))
                {
                    // _ and . were never likely to stand on their own. ie ESP _ 2 and ESP . 2 are never likely to be used.
                    // Having them on their own required a lot of code
                    // to treat them as a single unit where we did use them. So we treat them as AlphaNum.
                    xCharType = CharType.Identifier;
                }
                else
                {
                    xCharType = CharType.Symbol;
                }

                // i > 0 - Never do NewToken on first char. i = 0 is just a pass to get char and set lastchar.
                // But its faster as the second short circuit rather than a separate if.
                if ((xCharType != xLastCharType) && (0 < i))
                {
                    NextToken(xResult, ref i);
                }

                xLastCharType = xCharType;
            }

            // Last token
            if (mStart < mData.Length)
            {
                NextToken(xResult, ref i);
            }

            return(xResult);
        }
Example #3
0
        // BlueSkeye : Seems to be unused. Commented out.
        //public bool PatternMatches(string aPattern) {
        //  var xParser = new Parser(aPattern, false, true);
        //  return PatternMatches(xParser.Tokens);
        //}

        public bool PatternMatches(TokenList aObj)
        {
            // Dont compare TokenHashCodes, they take just as long to calculate
            // as a full comparison. Besides this function is often called after
            // comparing hash codes already.

            if (Count != aObj.Count)
            {
                return(false);
            }

            for (int i = 0; i < aObj.Count; i++)
            {
                var xThis = this[i];
                var xThat = aObj[i];
                if (xThis.Type != xThat.Type)
                {
                    return(false);
                }
                else if (xThis.Type == TokenType.AlphaNum || xThis.Type == TokenType.Keyword || xThis.Type == TokenType.Operator || xThis.Type == TokenType.Delimiter)
                {
                    if (xThis.RawValue == null || aObj[i].RawValue == null)
                    {
                    }
                    else if (string.Compare(xThis.RawValue, xThat.RawValue, true) != 0)
                    {
                        return(false);
                    }
                }
                else if (xThis.Type == TokenType.Register)
                {
                    string xThisUpper = xThis.RawValue.ToUpper();
                    string xThatUpper = xThat.RawValue.ToUpper();

                    if (xThisUpper == "_REG" || xThatUpper == "_REG")
                    {
                        // true, ie continue
                    }
                    else if (RegistersMatch(xThisUpper, xThatUpper, "_REG8", Parser.Registers8))
                    {
                    }
                    else if (RegistersMatch(xThisUpper, xThatUpper, "_REG16", Parser.Registers16))
                    {
                    }
                    else if (RegistersMatch(xThisUpper, xThatUpper, "_REG32", Parser.Registers32))
                    {
                    }
                    else if (RegistersMatch(xThisUpper, xThatUpper, "_REGIDX", Parser.RegistersIdx))
                    {
                    }
                    else if (RegistersMatch(xThisUpper, xThatUpper, "_REGADDR", Parser.RegistersAddr))
                    {
                    }
                    else if (xThisUpper == xThatUpper)
                    {
                        // This covers _REG==_REG, _REG8==_REG8, ... and DX==DX
                        // Must be last, after patterns
                    }
                    else
                    {
                        return(false);
                    }
                }
            }

            return(true);
        }