public void Tokenize(String s, AStringArray tokens) { String t; var last = 0; var inQuote = false; for (var i = 0; i < s.Length; i++) { if (!UArrayUtils.InArray(s[i], '"', '[', ']', '+', '-', '*')) { continue; } if (s[i] == '"') { if (!inQuote) { last = i + 1; } inQuote = !inQuote; } if (inQuote) { continue; } t = AStringUtils.Copy(s, last, i - last).Trim(); if (t != "") { tokens.SetLength(tokens.Length + 1); tokens.Last = t; } //store separator char as well, unless it's " if (s[i] != '"') { tokens.SetLength(tokens.Length + 1); tokens.Last = s[i].ToString(); } last = i + 1; } //last part t = AStringUtils.Copy(s, last, s.Length).Trim(); if (t == "") { return; } tokens.SetLength(tokens.Length + 1); tokens.Last = t; }
public Boolean Rewrite(ref String token) { if (token.Length == 0) { return(false); //empty string } var tokens = new AStringArray(); var quoteChar = '\0'; tokens.SetLength(0); String temp; /* 5.4: special pointer notation case */ if (token.Length > 4 && token.StartsWith("[[") && token.EndsWith("]]")) { //looks like a pointer in a address specifier (idiot user detected...) temp = "[" + AStringUtils.IntToHex(SymbolHandler.GetAddressFromName(AStringUtils.Copy(token, 2, token.Length - 4), true, out var haserror), 8) + ']'; if (!haserror) { token = temp; } else { throw new Exception("Invalid"); } } /* 5.4 ^^^ */ temp = ""; var i = 0; var inQuote = false; while (i < token.Length) { if (UArrayUtils.InArray(token[i], '\'', '"')) { if (inQuote) { if (token[i] == quoteChar) { inQuote = false; } } else { //start of a quote quoteChar = token[i]; inQuote = true; } } if (!inQuote) { if (UArrayUtils.InArray(token[i], '[', ']', '+', '-', ' ')) //6.8.4 (added ' ' for FAR, LONG, SHORT) { if (temp != "") { tokens.SetLength(tokens.Length + 1); tokens.Last = temp; temp = ""; } if (tokens.Length > 0 && UArrayUtils.InArray(token[i], '+', '-') && (tokens[tokens.Length - 1] == " ")) //relative offset ' +xxx' { temp += token[i]; i++; continue; } tokens.SetLength(tokens.Length + 1); tokens[tokens.Length - 1] = token[i].ToString(); i++; continue; } } temp += token[i]; i++; } if (temp != "") { tokens.SetLength(tokens.Length + 1); tokens[tokens.Length - 1] = temp; temp = ""; } for (i = 0; i < tokens.Length; i++) { if (tokens[i].Length >= 1 && !UArrayUtils.InArray(tokens[i][0], '[', ']', '+', '-', '*', ' ')) //3/16/2011: 11:15 (replaced or with and) { AStringUtils.Val("0x" + tokens[i], out Int64 _, out var err); if (err != 0 && GetReg(tokens[i], false) == -1) //not a hexadecimal value and not a register { temp = AStringUtils.IntToHex(SymbolHandler.GetAddressFromName(tokens[i], true, out var hasError), 8); if (!hasError) { tokens[i] = temp; //can be rewritten as a hexadecimal } else { if (tokens.Length > 0 && UArrayUtils.InArray(token[i], '+', '-') && tokens[tokens.Length - 1] == " ") //relative offset ' +xxx' { temp += token[i]; i++; continue; } var j = AStringUtils.Pos("*", tokens[i]); if (j != -1) //getreg failed, but could be it's the 'other' one { if (tokens[i].Length > j && (UArrayUtils.InArray(AStringUtils.Copy(tokens[i], j + 1, 1)[0], '2', '4', '8'))) { continue; //reg*2 / *3, /*4 } } if (i < tokens.Length - 1) { //perhaps it can be concatenated with the next one if (tokens[i + 1].Length > 0 && !UArrayUtils.InArray(tokens[i + 1][0], '\'', '"', '[', ']', '(', ')', ' ')) //not an invalid token char { tokens[i + 1] = tokens[i] + tokens[i + 1]; tokens[i] = ""; } } } } } } //do some calculations //check multiply first for (i = 1; i <= tokens.Length - 2; i++) { if (tokens[i] == "*") { AStringUtils.Val("0x" + tokens[i - 1], out Int64 a, out var err); AStringUtils.Val("0x" + tokens[i + 1], out Int64 b, out var err2); if (err == 0 && err2 == 0) { a *= b; tokens[i - 1] = AStringUtils.IntToHex(a, 8); tokens[i] = ""; tokens[i + 1] = ""; i -= 2; } } } for (i = 1; i <= tokens.Length - 2; i++) { //get the value of the token before and after this token AStringUtils.Val("0x" + tokens[i - 1], out Int64 a, out var err); AStringUtils.Val("0x" + tokens[i + 1], out Int64 b, out var err2); //if no error, check if this token is a mathemetical value if (err == 0 && err2 == 0) { switch (tokens[i][0]) { case '+': { a += b; tokens[i - 1] = AStringUtils.IntToHex(a, 8); tokens.Remove(i, 2); i -= 2; } break; case '-': { a -= b; tokens[i - 1] = AStringUtils.IntToHex(a, 8); tokens.Remove(i, 2); i -= 2; } break; } } else { if ((err2 == 0) && (tokens[i] != "") && (tokens[i][0] == '-') && (tokens[i - 1] != "#")) //before is not a valid value, but after it is. and this is a - (so -value) (don't mess with #-10000) { tokens[i] = "+"; tokens[i + 1] = AStringUtils.IntToHex(-b, 8); } } } token = ""; //remove useless tokens for (i = 0; i < tokens.Length; i++) { token += tokens[i]; } tokens.SetLength(0); return(true); }
public Boolean Tokenize(String opCode, AStringArray tokens) { var quoteChar = '\0'; tokens.SetLength(0); if (opCode.Length > 0) { opCode = opCode.TrimEnd(' ', ','); } var last = 0; var quoted = false; int i, j; for (i = 0; i <= opCode.Length; i++) { //check if this is a quote char if (i < opCode.Length && (opCode[i] == '\'' || opCode[i] == '"')) { if (quoted) //check if it's the end quote { if (opCode[i] == quoteChar) { quoted = false; } } else { quoted = true; quoteChar = opCode[i]; } } //check if we encounter a token seperator. (space or , ) //but only check when it's not inside a quoted string if ((i == opCode.Length) || ((!quoted) && ((opCode[i] == ' ') || (opCode[i] == ',')))) { tokens.SetLength(tokens.Length + 1); if (i == opCode.Length) { j = i - last + 1; } else { j = i - last; } tokens.Last = AStringUtils.Copy(opCode, last, j); if (j > 0 && (tokens.Last[0] != '$') && (j < 7 || (AStringUtils.Pos("KERNEL_", tokens.Last, true) == -1))) //only uppercase if it's not kernel_ { //don't uppercase empty strings, kernel_ strings or strings starting with $ if (tokens.Last.Length > 2) { if (!UArrayUtils.InArray(tokens.Last[0], '\'', '"')) //if not a quoted string then make it uppercase { tokens.Last = tokens.Last.ToUpper(); } } else { tokens.Last = tokens.Last.ToUpper(); } } //6.1: Optimized this lookup. Instead of a 18 compares a full string lookup on each token it now only compares up to 4 times var t = tokens.Last; var isPartial = false; if (t.Length >= 3) //3 characters are good enough to get the general idea, then do a string compare to verify { switch (t[0]) { case 'B': //BYTE, BYTE PTR { if (t[1] == 'Y' && t[2] == 'T') //could be BYTE { isPartial = t == "BYTE" || t == "BYTE PTR"; } } break; case 'D': //DQWORD, DWORD, DQWORD PTR, DWORD PTR { switch (t[1]) { case 'Q': //DQWORD or DQWORD PTR { if (t[2] == 'W') { isPartial = t == "DQWORD" || t == "DQWORD PTR"; } } break; case 'W': //DWORD or DWORD PTR { if (t[2] == 'O') { isPartial = t == "DWORD" || t == "DWORD PTR"; } } break; } } break; case 'F': //FAR { if (t[1] == 'A' && t[2] == 'R') { isPartial = t == "FAR"; } } break; case 'L': //LONG { if (t[1] == 'O' && t[2] == 'N') { isPartial = t == "LONG"; } } break; case 'Q': //QWORD, QWORD PTR { if (t[1] == 'W' && t[2] == 'O') //could be QWORD { isPartial = t == "QWORD" || t == "QWORD PTR"; } } break; case 'S': //SHORT { if (t[1] == 'H' && t[2] == 'O') { isPartial = (t == "SHORT"); } } break; case 'T': //TBYTE, TWORD, TBYTE PTR, TWORD PTR, { switch (t[1]) { case 'B': //TBYTE or TBYTE PTR { if (t[2] == 'Y') { isPartial = (t == "TBYTE") || (t == "TBYTE PTR"); } } break; case 'W': //TWORD or TWORD PTR { if (t[2] == 'O') { isPartial = (t == "TWORD") || (t == "TWORD PTR"); } } break; } } break; case 'W': //WORD, WORD PTR { if (t[1] == 'O' && t[3] == 'R') //could be WORD { isPartial = t == "WORD" || t == "WORD PTR"; } } break; } } if (isPartial) { tokens.SetLength(tokens.Length - 1); } else { last = i + 1; if (tokens.Length > 1) { var lastElem = tokens.Last; Rewrite(ref lastElem); //Rewrite tokens.Last = lastElem; } } } } //remove useless tokens i = 0; while (i < tokens.Length) { if (tokens[i] == "" || tokens[i] == " " || tokens[i] == ",") { for (j = i; j < tokens.Length - 1; j++) { tokens[j] = tokens[j + 1]; } tokens.SetLength(tokens.Length - 1); continue; } i++; } return(true); }