/// <summary> /// Gets a string from the tokenized expression. /// </summary> /// <param name="iterator">The iterator to the tokenized expression.</param> /// <param name="services">The shared assembly services.</param> /// <returns></returns> public static string GetString(RandomAccessIterator <Token> iterator, AssemblyServices services) { if (iterator.Current == null && !iterator.MoveNext()) { return(string.Empty); } var token = iterator.Current; if (IsStringLiteral(iterator)) { iterator.MoveNext(); return(Regex.Unescape(token.Name.ToString()).TrimOnce('"')); } else if (token.Type == TokenType.Function && token.Name.Equals("format", services.StringComparison)) { var str = GetFormatted(iterator, services); if (!string.IsNullOrEmpty(str) && Token.IsEnd(iterator.Current)) { return(str); } } else if (token.Type == TokenType.Function && token.Name.Equals("char", services.StringComparison)) { var code = (int)services.Evaluator.Evaluate(iterator, 0, 0x10FFFF); return(char.ConvertFromUtf32(services.Encoding.GetCodePoint(code))); } else if (token.Type == TokenType.Operand && (char.IsLetter(token.Name[0]) || token.Name[0] == '_') && !services.Evaluator.IsReserved(token.Name)) { var sym = services.SymbolManager.GetSymbol(token, services.CurrentPass > 0); if (sym == null) { return(string.Empty); } if (sym.DataType == DataType.String) { if ((!iterator.MoveNext() || Token.IsEnd(iterator.Current)) && sym.StorageType == StorageType.Scalar) { return(sym.StringValue.TrimOnce('"').ToString()); } else if (sym.StorageType == StorageType.Vector && iterator.Current.Name.Equals("[")) { var current = iterator.Current; var subscript = (int)services.Evaluator.Evaluate(iterator); if (Token.IsEnd(iterator.Current)) { if (subscript >= 0 && subscript < sym.StringVector.Count) { return(sym.StringVector[subscript].ToString()); } throw new SyntaxException(current, "Index out of range."); } } } } throw new SyntaxException(token, "Type mismatch."); }
/// <summary> /// Gets the formatted string from the tokenized expression. /// </summary> /// <param name="iterator">The iterator to the tokenized expression.</param> /// <param name="services">The shared assembly services.</param> /// <returns></returns> public static string GetFormatted(RandomAccessIterator <Token> iterator, AssemblyServices services) { iterator.MoveNext(); var format = iterator.GetNext(); if (Token.IsEnd(format)) { return(null); } string fmt; if (!format.IsDoubleQuote()) { if (format.Type != TokenType.Function && !format.Name.Equals("format", services.StringComparison)) { return(null); } fmt = GetFormatted(iterator, services); } else { fmt = Regex.Unescape(format.Name.TrimOnce('"').ToString()); } var parms = new List <object>(); if (iterator.MoveNext()) { while (!Token.IsEnd(iterator.GetNext())) { if (ExpressionIsAString(iterator, services)) { parms.Add(GetString(iterator, services)); } else { var parmVal = services.Evaluator.Evaluate(iterator, false); if (Regex.IsMatch(fmt, $"\\{{{parms.Count}(,-?\\d+)?:(d|D|x|X)\\d*\\}}")) { parms.Add((int)parmVal); } else { parms.Add(parmVal); } } } } if (parms.Count == 0) { return(fmt); } return(string.Format(fmt, parms.ToArray())); }
void ScanBlock(RandomAccessIterator <SourceLine> lines) { var ix = lines.Index; var line = lines.Current; var closures = new Stack <Token>(); closures.Push(line.Instruction); while (lines.MoveNext() && closures.Count > 0) { if (lines.Current.Instruction != null) { if (lines.Current.Instruction.Name.Equals(_openClosures[closures.Peek().Name], Services.StringViewComparer)) { closures.Pop(); } else if (_openClosures.ContainsKey(lines.Current.Instruction.Name)) { closures.Push(lines.Current.Instruction); } } } if (closures.Count > 0) { throw new SyntaxException(closures.Peek(), $"Missing closure \"{_openClosures[closures.Peek().Name]}\" for directive \"{closures.Peek().Name}\"."); } lines.SetIndex(ix); }
/// <summary> /// Determines whether the tokenized expression is a string. /// </summary> /// <param name="iterator">The iterator to the tokenized expression.</param> /// <param name="services">The shared assembly services.</param> /// <returns></returns> public static bool ExpressionIsAString(RandomAccessIterator <Token> iterator, AssemblyServices services) { var token = iterator.Current; if (token.IsDoubleQuote()) { return(token.Name.Length > 2 && Token.IsEnd(iterator.PeekNext())); } var ix = iterator.Index; var result = false; if (token.Type == TokenType.Function && (token.Name.Equals("format", services.StringComparison) || token.Name.Equals("char", services.StringComparison))) { iterator.MoveNext(); var parms = Token.GetGroup(iterator); var last = iterator.Current; result = Token.IsEnd(last); if (token.Name.Equals("char", services.StringComparison)) { result &= services.Evaluator.Evaluate(parms.GetIterator(), 0, 0x10FFFF).IsInteger(); } } else if (token.Type == TokenType.Operand && (char.IsLetter(token.Name[0]) || token.Name[0] == '_') && !services.Evaluator.IsReserved(token.Name)) { var sym = services.SymbolManager.GetSymbol(token, false); if (sym != null) { if (iterator.MoveNext() && iterator.Current.Name.Equals("[")) { var subscript = (int)services.Evaluator.Evaluate(iterator); result = Token.IsEnd(iterator.Current) && subscript >= 0 && subscript < sym.StringVector.Count; } else { result = Token.IsEnd(iterator.Current) && sym.StorageType == StorageType.Scalar && sym.DataType == DataType.String; } } } iterator.SetIndex(ix); return(result); }
public double EvaluateFunction(RandomAccessIterator <Token> tokens) { tokens.MoveNext(); var param = tokens.GetNext(); if (param.Equals(")")) { throw new SyntaxException(param.Position, "Expected argument not provided."); } var symbolLookup = Services.SymbolManager.GetSymbol(param, false); if (symbolLookup == null) { if (param.Type != TokenType.Operand || !char.IsLetter(param.Name[0]) || param.Name[0] != '_') { throw new SyntaxException(param.Position, "Function \"len\" expects a symbol."); } if (Services.CurrentPass > 0) { throw new SymbolException(param, SymbolException.ExceptionReason.NotDefined); } Services.PassNeeded = true; return(0); } param = tokens.GetNext(); if (!param.Name.Equals(")")) { param = tokens.GetNext(); int subscript = -1; if (param.Name.Equals("[")) { subscript = (int)Services.Evaluator.Evaluate(tokens, 0, int.MaxValue); } if (subscript < 0 || !tokens.PeekNext().Equals(")")) { throw new SyntaxException(param.Position, "Unexpected argument."); } if (symbolLookup.StorageType != StorageType.Vector) { throw new SyntaxException(param.Position, "Type mismatch."); } if (symbolLookup.DataType == DataType.String) { if (subscript >= symbolLookup.StringVector.Count) { throw new SyntaxException(param.Position, "Index out of range."); } return(symbolLookup.StringVector[subscript].Length); } if (subscript >= symbolLookup.NumericVector.Count) { throw new SyntaxException(param.Position, "Index out of range."); } return(symbolLookup.NumericVector[subscript].Size()); } return(symbolLookup.Length); }
double CallFunction(RandomAccessIterator <Token> tokens, bool returnValueExpected) { var functionToken = tokens.Current; var functionName = functionToken.Name; tokens.MoveNext(); var evalParms = new List <object>(); Token token = tokens.GetNext(); while (!token.Name.Equals(")")) { if (token.IsSeparator()) { tokens.MoveNext(); } if (StringHelper.ExpressionIsAString(tokens, Services)) { evalParms.Add(StringHelper.GetString(tokens, Services)); } else { evalParms.Add(Services.Evaluator.Evaluate(tokens, false)); } token = tokens.Current; } Services.SymbolManager.PushScopeEphemeral(); var value = _functionDefs[functionName].Invoke(evalParms); Services.SymbolManager.PopScopeEphemeral(); if (double.IsNaN(value) && returnValueExpected) { throw new ReturnException(functionToken.Position, $"Function name \"{functionName}\" did not return a value."); } return(value); }
/// <summary> /// Gets a grouping of tokens. /// </summary> /// <param name="tokens">The iterator to the full token expression.</param> /// <returns>The grouped tokens</returns> public static IEnumerable <Token> GetGroup(RandomAccessIterator <Token> tokens) { var list = new List <Token> { tokens.Current }; var open = tokens.Current.Name; var closed = OpenClose[open]; var opens = 1; while (tokens.MoveNext() && opens > 0) { list.Add(tokens.Current); if (tokens.Current.Name.Equals(open)) { opens++; } else if (tokens.Current.Name.Equals(closed)) { opens--; } } return(list); }
/// <summary> /// Parses the source string into a tokenized <see cref="SourceLine"/> collection. /// </summary> /// <param name="fileName">The source file's path/name.</param> /// <param name="source">The source string.</param> /// <returns>A collection of <see cref="SourceLine"/>s whose components are /// properly tokenized for further evaluation and assembly.</returns> /// <exception cref="ExpressionException"/> public static IEnumerable <SourceLine> Parse(string fileName, string source) { var iterator = new RandomAccessIterator <char>(source.ToCharArray()); Token rootParent, currentParent; Token token = null; Reset(); Token currentOpen = null; int currentLine = 1, lineNumber = currentLine; // lineIndex is the iterator index at the start of each line for purposes of calculating token // positions. sourceLindeIndex is the iterator index at the start of each new line // of source. Usually lineIndex and sourceLindeIndex are the same, but for those source lines // whose source code span multiple lines, they will be different. int lineIndex = -1, opens = 0, sourceLineIndex = lineIndex; var lines = new List <SourceLine>(); char previousChar = iterator.Current; while (iterator.GetNext() != EOF) { if (iterator.Current != NewLine && iterator.Current != ':' && iterator.Current != ';') { try { token = ParseToken(previousChar, token, iterator); if (token != null) { previousChar = iterator.Current; token.Parent = currentParent; token.Position = iterator.Index - lineIndex - token.Name.Length + 1; if (token.OperatorType == OperatorType.Open || token.OperatorType == OperatorType.Closed || token.OperatorType == OperatorType.Separator) { if (token.OperatorType == OperatorType.Open) { opens++; currentParent.AddChild(token); currentOpen = currentParent = token; AddBlankSeparator(); } else if (token.OperatorType == OperatorType.Closed) { if (currentOpen == null) { throw new ExpressionException(token, $"Missing opening for closure \"{token.Name}\""); } // check if matching ( to ) if (!Groups[currentOpen.Name].Equals(token.Name)) { throw new ExpressionException(token, $"Mismatch between \"{currentOpen.Name}\" in column {currentOpen.Position} and \"{token.Name}\""); } // go up the ladder currentOpen = currentParent = token.Parent = currentOpen.Parent; while (currentOpen != null && currentOpen.OperatorType != OperatorType.Open) { currentOpen = currentOpen.Parent; } opens--; } else { currentParent = currentParent.Parent; currentParent.AddChild(token); currentParent = token; } } else if (token.Type == TokenType.Instruction) { while (currentParent.Parent != rootParent) { currentParent = currentParent.Parent; } currentParent.AddChild(token); AddBlankSeparator(); AddBlankSeparator(); } else { currentParent.AddChild(token); } } } catch (ExpressionException ex) { Assembler.Log.LogEntry(fileName, lineNumber, ex.Position, ex.Message); } if (iterator.PeekNext() == NewLine) { iterator.MoveNext(); } } if (iterator.Current == ';') { _ = iterator.Skip(c => c != NewLine && (c != ':' || Assembler.Options.IgnoreColons) && c != EOF); } if (iterator.Current == NewLine || iterator.Current == ':' || iterator.Current == EOF) { previousChar = iterator.Current; /* A new source line is when: * 1. A line termination character (New Line, colon, EOF) is encountered * 2. And either there are no more characters left or the most recent token created * 3. Is not a binary operator nor it is a comma separator. */ var newLine = iterator.Current == EOF || (opens == 0 && (token == null || (token.OperatorType != OperatorType.Binary && token.OperatorType != OperatorType.Open && !token.Name.Equals(",") ) ) ); if (iterator.Current == NewLine) { currentLine++; } if (newLine) { var newSourceLine = new SourceLine(fileName, lineNumber, GetSourceLineSource(), rootParent.Children[0]); lines.Add(newSourceLine); if (Assembler.Options.WarnLeft && newSourceLine.Label != null && newSourceLine.Label.Position != 1) { Assembler.Log.LogEntry(newSourceLine, newSourceLine.Label, "Label is not at the beginning of the line.", false); } Reset(); lineNumber = currentLine; } else { token = null; } lineIndex = iterator.Index; if (newLine) { sourceLineIndex = iterator.Index; } } } if (currentOpen != null && currentOpen.OperatorType == OperatorType.Open) { Assembler.Log.LogEntry(fileName, 1, currentOpen.LastChild.Position, $"End of source reached without finding closing \"{Groups[currentOpen.Name]}\"."); } if (token != null) { lines.Add(new SourceLine(fileName, lineNumber, GetSourceLineSource(), rootParent.Children[0])); } return(lines); void AddBlankSeparator() { var sepToken = new Token() { Type = TokenType.Operator, OperatorType = OperatorType.Separator, Name = string.Empty, Position = token == null ? 1 : token.Position, Children = new List <Token>() }; currentParent.AddChild(sepToken); currentParent = sepToken; } string GetSourceLineSource() { if (iterator.Index > sourceLineIndex + 1) { return(source.Substring(sourceLineIndex + 1, iterator.Index - sourceLineIndex - 1)); } return(string.Empty); } void Reset() { currentParent = rootParent = new Token(); currentParent.Children = new List <Token>(); AddBlankSeparator(); AddBlankSeparator(); token = null; } }
static Token ParseToken(char previousChar, Token previousToken, RandomAccessIterator <char> iterator, bool parsingAssembly = true) { char c = iterator.Current; while (char.IsWhiteSpace(c)) { if (c == NewLine && parsingAssembly) { iterator.Rewind(iterator.Index - 1); return(null); } c = iterator.GetNext(); } if ((c == ';' && parsingAssembly) || c == EOF) { return(null); } var token = new Token(); //first case, simplest var nextChar = iterator.PeekNext(); if (char.IsDigit(c) || char.IsLetter(c) || c == '_' || c == '?' || (c == '.' && char.IsLetterOrDigit(nextChar)) || (c == '\\' && char.IsLetterOrDigit(nextChar))) { token.Type = TokenType.Operand; if (char.IsDigit(c) || (c == '.' && char.IsDigit(nextChar))) { if (char.IsDigit(c) && previousChar == '$') { token.Name = ScanTo(previousChar, iterator, FirstNonHex); } else if (c == '0' && (nextChar == 'b' || nextChar == 'B' || nextChar == 'o' || nextChar == 'O' || nextChar == 'x' || nextChar == 'X')) { token.Name = ScanTo(previousChar, iterator, FirstNonNonBase10); } else { token.Name = ScanTo(previousChar, iterator, FirstNonNumeric); } } else if (c == '\\') { iterator.MoveNext(); token.Name = c + ScanTo(previousChar, iterator, FirstNonLetterOrDigit); } else if (c == '?') { token.UnparsedName = token.Name = "?"; return(token); } else { token.UnparsedName = token.Name = ScanTo(previousChar, iterator, FirstNonSymbol); if (parsingAssembly && !Assembler.Options.CaseSensitive) { token.Name = token.Name.ToLower(); } if (parsingAssembly && Assembler.InstructionLookupRules.Any(rule => rule(token.Name))) { token.Type = TokenType.Instruction; } else if (iterator.Current == '(' || (iterator.Current != NewLine && char.IsWhiteSpace(iterator.Current) && iterator.PeekNextSkipping(NonNewLineWhiteSpace) == '(')) { token.Type = TokenType.Operator; token.OperatorType = OperatorType.Function; } else { token.Type = TokenType.Operand; } } } else if (previousToken != null && previousToken.Name.Equals("%") && previousToken.OperatorType == OperatorType.Unary && (c == '.' || c == '#')) { // alternative binary string parsing token.Type = TokenType.Operand; token.Name = ScanTo(previousChar, iterator, FirstNonAltBin).Replace('.', '0') .Replace('#', '1'); } else if (c == '"' || c == SingleQuote) { var open = c; var quoteBuilder = new StringBuilder(c.ToString()); var escaped = false; while ((c = iterator.GetNext()) != open && c != char.MinValue) { quoteBuilder.Append(c); if (c == '\\') { escaped = true; quoteBuilder.Append(iterator.GetNext()); } } if (c == char.MinValue) { throw new ExpressionException(iterator.Index, $"Quote string not enclosed."); } quoteBuilder.Append(c); var unescaped = escaped ? Regex.Unescape(quoteBuilder.ToString()) : quoteBuilder.ToString(); if (c == '\'' && unescaped.Length > 3) { throw new ExpressionException(iterator.Index, "Too many characters in character literal."); } token.Name = unescaped; token.Type = TokenType.Operand; } else { if (c == '+' || c == '-') { /* * Scenarios for parsing '+' or '-', since they can function as different things * in an expression. * 1. The binary operator: * a. OPERAND+3 / ...)+(... => single '+' sandwiched between two operands/groupings * b. OPERAND++3 / ...)++(... => the first '+' is a binary operator since it is to the * right of an operand/grouping. We need to split off the single '++' to two * separate '+' tokens. What kind of token is the second '+'? We worry about that later. * c. OPERAND+++3 / ...)+++(... => again, the first '+' is a binary operator. We need to split * it off from the rest of the string of '+' characters, and we worry about later. * 2. The unary operator: * a. +3 / +(... => single '+' immediately preceding an operand/grouping. * b. ++3 / ++(... => parser doesn't accept C-style prefix (or postfix) operators, so one of these is an * anonymous label. Which one? Easy, the first. Split the '+' string. * 3. A full expression mixing both: * a. OPERAND+++3 / ...)+++(... => From scenario 1.c, we know the first '+' is a binary operator, * which leaves us with => '++3' left, which from scenario 2.b. we know the first '+' * has to be an operand. So we split the string again, so that the next scan leaves us with * '+3', so the third and final plus is a unary operator. * OPERAND => operand * + => binary operator * + => operand * + => unary operator * 3/( => operand/grouping * 4. A line reference: * a. + => Simplest scenario. * b. ++, +++, ++++, etc. => Treat as one. */ // Get the full string token.Name = ScanTo(previousChar, iterator, FirstNonPlusMinus); if (previousToken != null && (previousToken.Type == TokenType.Operand || previousToken.Name.Equals(")"))) { // looking backward at the previous token, if it's an operand or grouping then we // know this is a binary token.Type = TokenType.Operator; token.OperatorType = OperatorType.Binary; if (token.Name.Length > 1) // we need to split off the rest of the string so we have a single char '+' { token.Name = c.ToString(); iterator.Rewind(iterator.Index - token.Position - 1); } } else if (!IsNotOperand(nextChar) || nextChar == '(') { // looking at the very next character in the input stream, if it's an operand or grouping // then we know this is a unary if (token.Name.Length > 1) { // If the string is greater than one character, // then it's not a unary, it's an operand AND a unary. So we split off the // rest of the string. token.Name = c.ToString(); iterator.Rewind(iterator.Index - token.Position - 1); token.Type = TokenType.Operand; } else { token.Type = TokenType.Operator; token.OperatorType = OperatorType.Unary; } } else { token.Type = TokenType.Operand; } } else if (c == '*') { // Same as +/- scenario above, if the previous token is an operand or grouping, // we need to treat the splat as a binary operator. if (previousToken != null && (previousToken.Type == TokenType.Operand || previousToken.Name.Equals(")"))) { token.Type = TokenType.Operator; token.OperatorType = OperatorType.Binary; } else { // but since there is no unary version of this we will treat as an operand, and let the evaluator // deal with any problems like *OPERAND /*( token.Type = TokenType.Operand; } token.Name = c.ToString(); } else { // not a number, symbol, string, or special (+, -, *) character. So we just treat as an operator token.Type = TokenType.Operator; if (c.IsSeparator() || c.IsOpenOperator() || c.IsClosedOperator()) { token.Name = c.ToString(); if (c.IsSeparator()) { token.OperatorType = OperatorType.Separator; } else if (c.IsOpenOperator()) { token.OperatorType = OperatorType.Open; } else { token.OperatorType = OperatorType.Closed; } } else { token.Name = ScanTo(previousChar, iterator, FirstNonMatchingOperator); token.UnparsedName = token.Name; /* The general strategy to determine whether an operator is unary or binary: * 1. Is it actually one of the defined unary types? * 2. Peek at the next character. Is it a group or operand, or not? * 3. Look behind at the previous token. Is it also a group or operand, or not? * 4. If the token does NOT follow an operand or group, AND it precedes a group character, * or operand character, then it is a unary. * 5. All other cases, binary. * */ if ( ( ( c.IsUnaryOperator() && ( !IsNotOperand(nextChar) || nextChar == '(' || nextChar.IsRadixOperator() || nextChar.IsUnaryOperator() ) ) || ( c.IsRadixOperator() && char.IsLetterOrDigit(nextChar) ) || ( c == '%' && (nextChar == '.' || nextChar == '#') ) ) && (previousToken == null || (previousToken.Type != TokenType.Operand && !previousToken.Name.Equals(")") ) ) ) { token.OperatorType = OperatorType.Unary; } else { token.OperatorType = OperatorType.Binary; } } } } if (string.IsNullOrEmpty(token.UnparsedName)) { token.UnparsedName = token.Name; } if (iterator.Current != token.Name[^ 1]) { iterator.Rewind(iterator.Index - 1); } return(token); }