static Token ParseToken(char previousChar, Token previousToken, RandomAccessIterator <char> iterator, bool parsingAssembly = true) { char c = iterator.Current; while (char.IsWhiteSpace(c)) { if (c == NewLine && parsingAssembly) { iterator.Rewind(iterator.Index - 1); return(null); } c = iterator.GetNext(); } if ((c == ';' && parsingAssembly) || c == EOF) { return(null); } var token = new Token(); //first case, simplest var nextChar = iterator.PeekNext(); if (char.IsDigit(c) || char.IsLetter(c) || c == '_' || c == '?' || (c == '.' && char.IsLetterOrDigit(nextChar)) || (c == '\\' && char.IsLetterOrDigit(nextChar))) { token.Type = TokenType.Operand; if (char.IsDigit(c) || (c == '.' && char.IsDigit(nextChar))) { if (char.IsDigit(c) && previousChar == '$') { token.Name = ScanTo(previousChar, iterator, FirstNonHex); } else if (c == '0' && (nextChar == 'b' || nextChar == 'B' || nextChar == 'o' || nextChar == 'O' || nextChar == 'x' || nextChar == 'X')) { token.Name = ScanTo(previousChar, iterator, FirstNonNonBase10); } else { token.Name = ScanTo(previousChar, iterator, FirstNonNumeric); } } else if (c == '\\') { iterator.MoveNext(); token.Name = c + ScanTo(previousChar, iterator, FirstNonLetterOrDigit); } else if (c == '?') { token.UnparsedName = token.Name = "?"; return(token); } else { token.UnparsedName = token.Name = ScanTo(previousChar, iterator, FirstNonSymbol); if (parsingAssembly && !Assembler.Options.CaseSensitive) { token.Name = token.Name.ToLower(); } if (parsingAssembly && Assembler.InstructionLookupRules.Any(rule => rule(token.Name))) { token.Type = TokenType.Instruction; } else if (iterator.Current == '(' || (iterator.Current != NewLine && char.IsWhiteSpace(iterator.Current) && iterator.PeekNextSkipping(NonNewLineWhiteSpace) == '(')) { token.Type = TokenType.Operator; token.OperatorType = OperatorType.Function; } else { token.Type = TokenType.Operand; } } } else if (previousToken != null && previousToken.Name.Equals("%") && previousToken.OperatorType == OperatorType.Unary && (c == '.' || c == '#')) { // alternative binary string parsing token.Type = TokenType.Operand; token.Name = ScanTo(previousChar, iterator, FirstNonAltBin).Replace('.', '0') .Replace('#', '1'); } else if (c == '"' || c == SingleQuote) { var open = c; var quoteBuilder = new StringBuilder(c.ToString()); var escaped = false; while ((c = iterator.GetNext()) != open && c != char.MinValue) { quoteBuilder.Append(c); if (c == '\\') { escaped = true; quoteBuilder.Append(iterator.GetNext()); } } if (c == char.MinValue) { throw new ExpressionException(iterator.Index, $"Quote string not enclosed."); } quoteBuilder.Append(c); var unescaped = escaped ? Regex.Unescape(quoteBuilder.ToString()) : quoteBuilder.ToString(); if (c == '\'' && unescaped.Length > 3) { throw new ExpressionException(iterator.Index, "Too many characters in character literal."); } token.Name = unescaped; token.Type = TokenType.Operand; } else { if (c == '+' || c == '-') { /* * Scenarios for parsing '+' or '-', since they can function as different things * in an expression. * 1. The binary operator: * a. OPERAND+3 / ...)+(... => single '+' sandwiched between two operands/groupings * b. OPERAND++3 / ...)++(... => the first '+' is a binary operator since it is to the * right of an operand/grouping. We need to split off the single '++' to two * separate '+' tokens. What kind of token is the second '+'? We worry about that later. * c. OPERAND+++3 / ...)+++(... => again, the first '+' is a binary operator. We need to split * it off from the rest of the string of '+' characters, and we worry about later. * 2. The unary operator: * a. +3 / +(... => single '+' immediately preceding an operand/grouping. * b. ++3 / ++(... => parser doesn't accept C-style prefix (or postfix) operators, so one of these is an * anonymous label. Which one? Easy, the first. Split the '+' string. * 3. A full expression mixing both: * a. OPERAND+++3 / ...)+++(... => From scenario 1.c, we know the first '+' is a binary operator, * which leaves us with => '++3' left, which from scenario 2.b. we know the first '+' * has to be an operand. So we split the string again, so that the next scan leaves us with * '+3', so the third and final plus is a unary operator. * OPERAND => operand * + => binary operator * + => operand * + => unary operator * 3/( => operand/grouping * 4. A line reference: * a. + => Simplest scenario. * b. ++, +++, ++++, etc. => Treat as one. */ // Get the full string token.Name = ScanTo(previousChar, iterator, FirstNonPlusMinus); if (previousToken != null && (previousToken.Type == TokenType.Operand || previousToken.Name.Equals(")"))) { // looking backward at the previous token, if it's an operand or grouping then we // know this is a binary token.Type = TokenType.Operator; token.OperatorType = OperatorType.Binary; if (token.Name.Length > 1) // we need to split off the rest of the string so we have a single char '+' { token.Name = c.ToString(); iterator.Rewind(iterator.Index - token.Position - 1); } } else if (!IsNotOperand(nextChar) || nextChar == '(') { // looking at the very next character in the input stream, if it's an operand or grouping // then we know this is a unary if (token.Name.Length > 1) { // If the string is greater than one character, // then it's not a unary, it's an operand AND a unary. So we split off the // rest of the string. token.Name = c.ToString(); iterator.Rewind(iterator.Index - token.Position - 1); token.Type = TokenType.Operand; } else { token.Type = TokenType.Operator; token.OperatorType = OperatorType.Unary; } } else { token.Type = TokenType.Operand; } } else if (c == '*') { // Same as +/- scenario above, if the previous token is an operand or grouping, // we need to treat the splat as a binary operator. if (previousToken != null && (previousToken.Type == TokenType.Operand || previousToken.Name.Equals(")"))) { token.Type = TokenType.Operator; token.OperatorType = OperatorType.Binary; } else { // but since there is no unary version of this we will treat as an operand, and let the evaluator // deal with any problems like *OPERAND /*( token.Type = TokenType.Operand; } token.Name = c.ToString(); } else { // not a number, symbol, string, or special (+, -, *) character. So we just treat as an operator token.Type = TokenType.Operator; if (c.IsSeparator() || c.IsOpenOperator() || c.IsClosedOperator()) { token.Name = c.ToString(); if (c.IsSeparator()) { token.OperatorType = OperatorType.Separator; } else if (c.IsOpenOperator()) { token.OperatorType = OperatorType.Open; } else { token.OperatorType = OperatorType.Closed; } } else { token.Name = ScanTo(previousChar, iterator, FirstNonMatchingOperator); token.UnparsedName = token.Name; /* The general strategy to determine whether an operator is unary or binary: * 1. Is it actually one of the defined unary types? * 2. Peek at the next character. Is it a group or operand, or not? * 3. Look behind at the previous token. Is it also a group or operand, or not? * 4. If the token does NOT follow an operand or group, AND it precedes a group character, * or operand character, then it is a unary. * 5. All other cases, binary. * */ if ( ( ( c.IsUnaryOperator() && ( !IsNotOperand(nextChar) || nextChar == '(' || nextChar.IsRadixOperator() || nextChar.IsUnaryOperator() ) ) || ( c.IsRadixOperator() && char.IsLetterOrDigit(nextChar) ) || ( c == '%' && (nextChar == '.' || nextChar == '#') ) ) && (previousToken == null || (previousToken.Type != TokenType.Operand && !previousToken.Name.Equals(")") ) ) ) { token.OperatorType = OperatorType.Unary; } else { token.OperatorType = OperatorType.Binary; } } } } if (string.IsNullOrEmpty(token.UnparsedName)) { token.UnparsedName = token.Name; } if (iterator.Current != token.Name[^ 1]) { iterator.Rewind(iterator.Index - 1); } return(token); }