/// <summary>Parse next token from currently parsed line, starting at given position and /// add the retrieved token at end of given token list.</summary> /// <param name="aList">The token list where to add the newly recognized token.</param> /// <param name="lineNumber">Line number for diagnostics and debugging purpose.</param> /// <param name="rPos">The index in current source code line of the first not yet consumed /// character. On return this parameter will be updated to account for characters that would /// have been consumed.</param> protected void NewToken(TokenList aList, int lineNumber, ref int rPos) { #region Pattern Notes // All patterns start with _, this makes them reserved. User can use too, but at own risk of conflict. // // Wildcards // -_REG or ??X // -_REG8 or ?H,?L // -_REG16 or ?X // -_REG32 or E?X // - ? based ones are ugly and less clear // -_Keyword // -_ABC // // // Multiple Options (All caps only) - Registers only // Used to suport EAX,EBX - ie lists. But found out wasnt really needed. May add again later. // // -AX/AL - Conflict if we ever use / // -AX|AL - Conflict if we ever use | // -AX,AL - , is unlikely to ever be used as an operator and is logical as a separator. Method calls might use, but likely better to use a space // since we will only allow simple arguments, not compound. // -_REG:AX|AL - End terminator issue // -_REG[AX|AL] - Conflict with existing indirect access. Is indirect access always numeric? I think x86 has some register based ones too. // // // Specific: Register, Keyword, AlphaNum // -EAX #endregion string xString = null; char xChar1 = mData[mStart]; var xToken = new Token(lineNumber); // Recognize comments and literal assembler code. if (mAllWhitespace && "/!".Contains(xChar1)) { rPos = mData.Length; // This will account for the dummy whitespace at the end. xString = mData.Substring(mStart + 1, rPos - mStart - 1).Trim(); // So ToString/Format wont generate error xString = xString.Replace("{", "{{"); xString = xString.Replace("}", "}}"); // Fix issue #15662 with string length check. // Fix issue #15663 with comparing from mData and not from xString anymore. if (('/' == xChar1) && (2 <= xString.Length) && ('/' == mData[mStart + 1])) { xString = xString.Substring(1); xToken.Type = TokenType.Comment; } else if (xChar1 == '!') { // Literal assembler code. xToken.Type = TokenType.LiteralAsm; } } else { xString = mData.Substring(mStart, rPos - mStart); if (string.IsNullOrWhiteSpace(xString) && xString.Length > 0) { xToken.Type = TokenType.WhiteSpace; } else if (xChar1 == '\'') { xToken.Type = TokenType.ValueString; xString = xString.Substring(1, xString.Length - 2); } else if (char.IsDigit(xChar1)) { xToken.Type = TokenType.ValueInt; if (xString.StartsWith("0x")) { xToken.SetIntValue(Convert.ToUInt32(xString, 16)); } else { xToken.SetIntValue(uint.Parse(xString)); } } else if (xChar1 == '$') { xToken.Type = TokenType.ValueInt; // Remove surrounding ' xString = "0x" + xString.Substring(1); if (xString.StartsWith("0x")) { xToken.SetIntValue(Convert.ToUInt32(xString, 16)); } else { xToken.SetIntValue(uint.Parse(xString)); } } else if (IsAlphaNum(xChar1)) // This must be after check for ValueInt { string xUpper = xString.ToUpper(); // Special parsing when in pattern mode. We recognize some special strings // which would otherwise be considered as simple AlphaNum token otherwise. if (mAllowPatterns) { if (RegisterPatterns.Contains(xUpper)) { xToken.Type = TokenType.Register; } else if (xUpper == "_KEYWORD") { xToken.Type = TokenType.Keyword; xString = null; } else if (xUpper == "_ABC") { xToken.Type = TokenType.AlphaNum; xString = null; } else if (xUpper == "_PCALL") { xString = null; xToken.Type = TokenType.Call; } } if (xToken.Type == TokenType.Unknown) { XSRegisters.Register xRegister; if (Registers.TryGetValue(xUpper, out xRegister)) { xToken.Type = TokenType.Register; xToken.SetRegister(xRegister); } else if (mKeywords.Contains(xUpper)) { xToken.Type = TokenType.Keyword; } else if (xString.Contains("(") && xString.Contains(")") && IsAlphaNum(xChar1)) { xToken.Type = TokenType.Call; } else { xToken.Type = TokenType.AlphaNum; } } } else if (Delimiters.Contains(xString)) { xToken.Type = TokenType.Delimiter; } else if (Operators.Contains(xString)) { xToken.Type = TokenType.Operator; } } xToken.RawValue = xString; xToken.SrcPosStart = mStart; xToken.SrcPosEnd = xToken.Type == TokenType.Call ? rPos : rPos - 1; if (mAllWhitespace && (xToken.Type != TokenType.WhiteSpace)) { mAllWhitespace = false; } mStart = xToken.Type == TokenType.Call ? rPos + 1 : rPos; if (mIncludeWhiteSpace || (xToken.Type != TokenType.WhiteSpace)) { aList.Add(xToken); } }
/// <summary>Consume text that has been provided to the class constructor, splitting it into /// a list of tokens.</summary> /// <param name="lineNumber">Line number for diagnostics and debugging.</param> /// <returns>The resulting tokens list.</returns> protected TokenList Parse(int lineNumber) { // Save in comment, might be useful in future. Already had to dig it out of TFS once //var xRegex = new System.Text.RegularExpressions.Regex(@"(\W)"); var xResult = new TokenList(); CharType xLastCharType = CharType.WhiteSpace; char xChar; CharType xCharType = CharType.WhiteSpace; int i = 0; for (i = 0; i < mData.Length; i++) { xChar = mData[i]; // Extract string literal (surrounded with single quote characters). if (xChar == '\'') { // Take data before the ' as a token. NewToken(xResult, lineNumber, ref i); // Now scan to the next ' taking into account escaped single quotes. bool escapedCharacter = false; for (i = i + 1; i < mData.Length; i++) { bool done = false; switch (mData[i]) { case '\'': if (!escapedCharacter) { done = true; } break; case '\\': escapedCharacter = !escapedCharacter; break; default: escapedCharacter = false; break; } if (done) { break; } } if (i == mData.Length) { throw new Exception("Unterminated string."); } i++; xCharType = CharType.String; } else if (xChar == '(') { for (i += 1; i < mData.Length; i++) { if (mData[i] == ')' && mData.LastIndexOf(")") <= i) { i++; NewToken(xResult, lineNumber, ref i); break; } } } else if (char.IsWhiteSpace(xChar)) { xCharType = CharType.WhiteSpace; } else if (IsAlphaNum(xChar)) { // _ and . were never likely to stand on their own. ie ESP _ 2 and ESP . 2 are never likely to be used. // Having them on their own required a lot of code // to treat them as a single unit where we did use them. So we treat them as AlphaNum. xCharType = CharType.Identifier; } else { xCharType = CharType.Symbol; } // i > 0 - Never do NewToken on first char. i = 0 is just a pass to get char and set lastchar. // But its faster as the second short circuit rather than a separate if. if ((xCharType != xLastCharType) && (0 < i)) { NewToken(xResult, lineNumber, ref i); } xLastCharType = xCharType; } // Last token if (mStart < mData.Length) { NewToken(xResult, lineNumber, ref i); } return(xResult); }
// BlueSkeye : Seems to be unused. Commented out. //public bool PatternMatches(string aPattern) { // var xParser = new Parser(aPattern, false, true); // return PatternMatches(xParser.Tokens); //} public bool PatternMatches(TokenList aObj) { // Dont compare TokenHashCodes, they take just as long to calculate // as a full comparison. Besides this function is often called after // comparing hash codes already. if (Count != aObj.Count) { return(false); } for (int i = 0; i < aObj.Count; i++) { var xThis = this[i]; var xThat = aObj[i]; if (xThis.Type != xThat.Type) { return(false); } else if (xThis.Type == TokenType.AlphaNum || xThis.Type == TokenType.Keyword || xThis.Type == TokenType.Operator || xThis.Type == TokenType.Delimiter) { if (xThis.RawValue == null || aObj[i].RawValue == null) { } else if (string.Compare(xThis.RawValue, xThat.RawValue, true) != 0) { return(false); } } else if (xThis.Type == TokenType.Register) { string xThisUpper = xThis.RawValue.ToUpper(); string xThatUpper = xThat.RawValue.ToUpper(); if (xThisUpper == "_REG" || xThatUpper == "_REG") { // true, ie continue } else if (RegistersMatch(xThisUpper, xThatUpper, "_REG8", Parser.Registers8)) { } else if (RegistersMatch(xThisUpper, xThatUpper, "_REG16", Parser.Registers16)) { } else if (RegistersMatch(xThisUpper, xThatUpper, "_REG32", Parser.Registers32)) { } else if (RegistersMatch(xThisUpper, xThatUpper, "_REGIDX", Parser.RegistersIdx)) { } else if (RegistersMatch(xThisUpper, xThatUpper, "_REGADDR", Parser.RegistersAddr)) { } else if (xThisUpper == xThatUpper) { // This covers _REG==_REG, _REG8==_REG8, ... and DX==DX // Must be last, after patterns } else { return(false); } } } return(true); }