/// <summary> /// Adds a non-trivial data size element to the object file. /// </summary> /// <param name="objFile">The object file to add the element to.</param> /// <param name="dataType">The string token declaring the data type</param> /// <param name="elemValue">The string</param> /// <param name="lineNum"></param> private void AddNonTrivialDataElementToObjectFile(BasicObjectFile objFile, string dataType, string elemValue) { if (dataType == ".ascii") { objFile.AddAsciiString(elemValue); } else if (dataType == ".asciiz") { objFile.AddNullTerminatedAsciiString(elemValue); } else if (dataType == ".space") { int numReservedSpace = ParserCommon.DetermineNonTrivialDataLength(dataType, elemValue); for (int i = 0; i < numReservedSpace; ++i) { objFile.AddDataElement((byte)0); } } else { throw new ArgumentException("Unknown data type " + dataType + " passed as non-trivial data type."); } }
/// <summary> /// Reads a line in a .text segment of a program, and adds any found symbols to the /// symbol table. /// </summary> /// <param name="asmLine">The line of assembly code to parse.</param> /// <param name="symbolList">The list of symbols that will be added to.</param> /// <param name="alignment">Unused. Alignment is always on word boundaries in the text segment.</param> public void ParseSymbolsInLine(LineData asmLine, SymbolTable symbolList, int alignment) { string[] tokens = asmLine.Text.Split(' '); // a label should end with a ':' character. // this is OK if there's trash and no real assembly at this point, // as the second pass code generator will flag it. // we're just here to get symbols and addresses. if (ParserCommon.ContainsLabel(tokens[0])) { string labelName = ParserCommon.ExtractLabel(tokens[0]); var label = new Symbol(labelName, SegmentType.Text, m_CurrTextAddress); label.Size = sizeof(int); symbolList.AddSymbol(label); // determine if there are any instructions on this line. string[] subTokens = tokens[0].Split(new[] { ':' }, StringSplitOptions.RemoveEmptyEntries); // if we have more than one subtoken, then there is more than just a label on this line. // increment the number of words in the segment (since we're assuming whatever is on the right-hand side // is an instruction) by however many bytes the instruction is if (subTokens.Length > 1) { ParseUnlabeledLine(asmLine); } } // if this doesn't have a label, and is not empty or a comment, // then this is an instruction. increment the counter. else { ParseUnlabeledLine(asmLine); } }
/// <summary> /// Parses a labeled line for symbols, and calculates the appropriate address of the element (if any). /// </summary> /// <param name="symTable">The symbol table to add the label to.</param> /// <param name="originalLine">The line data being parsed.</param> /// <param name="tokens">The string array of space-separated tokens.</param> /// <param name="alignment">The current alignment</param> private void ParseLabeledLine(SymbolTable symTable, LineData originalLine, string[] tokens, int alignment) { // if we're trying to figure out the backing data size of a label still (e.g. // label was declared, then the data declaration followed a few newlines later), // we shouldn't be seeing another label. if (m_UnresolvedSym != null) { throw new AssemblyException(originalLine.LineNum, "Expected data declaration after label."); } string labelName = ParserCommon.ExtractLabel(tokens[0]); var label = new Symbol(labelName, SegmentType.Data, m_CurrDataAddress); symTable.AddSymbol(label); m_UnresolvedSym = label; // if we couldn't find any following data elements, list this symbol as unresolved. ParseUnlabeledLine(originalLine, tokens, alignment); }
/// <summary> /// Reads a denoted .data segment line of an assembly program for symbols /// </summary> /// <param name="reader">The reader used to read the file.</param> /// <param name="symbolList">The list of symbols that will be added to.</param> /// <param name="startingLine">The line that the .data segment starts on. Will be incremented</param> public void ParseSymbolsInLine(LineData asmLine, SymbolTable symbolList, int alignment) { string[] tokens = asmLine.Text.Split(' ', '\t'); string[] fixedTokens = ParserCommon.GetTrimmedTokenArray(tokens).ToArray(); // a label should end with a ':' character and should be the first token. if (ParserCommon.ContainsLabel(fixedTokens[0])) { ParseLabeledLine(symbolList, asmLine, fixedTokens, alignment); } // if this doesn't have a label, and is not empty or a comment, // then this is a data element. else { // try to make sure this isn't garbage. // this will return false if no data was parsed, if (!ParseUnlabeledLine(asmLine, fixedTokens, alignment)) { throw new AssemblyException(asmLine.LineNum, "Expected size declaration, received \"" + asmLine.Text + '\"'); } } }
/// <summary> /// Populates an existing symbol table with symbols parsed from the desired segment. /// </summary> /// <param name="reader">A StreamReader instance that will read the input assembly file.</param> /// <param name="desiredSegment">The program segment to parse symbols from.</param> /// <param name="symTable">The SymbolTable instance to populate.</param> public void GenerateSymbolTableForSegment(StreamReader reader, SegmentType desiredSegment, SymbolTable symTable) { SegmentType currSegmentType = SegmentType.Invalid; int currAlignment = CommonConstants.DEFAULT_ALIGNMENT; // a list of all exceptions we encounter during parsing. // users can view them all at once instead of working through them piecemeal. var exceptionList = new List <AssemblyException>(); int lineNum = 0; while (!reader.EndOfStream) { try { ++lineNum; // trim the whitespace from any read-in line. string line = reader.ReadLine().Trim(); // get a substring up until the commented line, ignoring those in user quotes. line = ParserCommon.GetSanitizedString(line); // ignore blank lines. trim should remove all whitespace try { if (!string.IsNullOrWhiteSpace(line)) { LineParseResults directiveResults = ParserCommon.HandleAssemblerDirective(line, lineNum, currSegmentType, currAlignment); currAlignment = directiveResults.NewAlignment; currSegmentType = directiveResults.NewSegment; // further processing is needed, and the current segment type is the desired segment type. if (!directiveResults.IsLineAssemblerDirective && currSegmentType == desiredSegment) { if (!TryHandlingLinkageDeclaration(line, lineNum, currSegmentType, symTable)) { ISymbolTableBuilder segParser = m_SymbolBuilderFac.GetParserForSegment(lineNum, currSegmentType); var asmLine = new LineData(line, lineNum); segParser.ParseSymbolsInLine(asmLine, symTable, currAlignment); } } } } catch (AssemblyException) { throw; } catch (Exception ex) { throw new AssemblyException(lineNum, ex.Message); } } catch (AssemblyException ex) { exceptionList.Add(ex); } } // if any exceptions were encountered, throw an aggregate exception with // all of the encountered exceptions. if (exceptionList.Any()) { throw new AggregateAssemblyError(exceptionList); } // reset the StreamReader to the beginning position. reader.Seek(0, SeekOrigin.Begin); }
/// <summary> /// Generates code for the given assembly file. /// </summary> /// <param name="reader">A StreamReader instance that will read the input assembly file.</param> /// <param name="objFile">The basic object file that will be written to.</param> public void GenerateCode(string fileName, StreamReader reader, BasicObjectFile objFile) { SegmentType currSegmentType = SegmentType.Invalid; int currAlignment = CommonConstants.DEFAULT_ALIGNMENT; // a list of all exceptions we encounter during parsing. // users can view them all at once instead of working through them piecemeal. var exceptionList = new List <AssemblyException>(); int lineNum = 0; while (!reader.EndOfStream) { try { ++lineNum; // trim the whitespace from any read-in line. string line = reader.ReadLine().Trim(); // get a substring up until the commented line, ignoring those in user quotes. line = ParserCommon.GetSanitizedString(line); // ignore blank lines. trim should remove all whitespace if (line.Any()) { LineParseResults directiveResults = ParserCommon.HandleAssemblerDirective(line, lineNum, currSegmentType, currAlignment); // if we have a new data alignment, we need to record this in case the disassembler // parses this file. otherwise, it will interpret padding bytes as actual data elements. if (currAlignment != directiveResults.NewAlignment) { objFile.AddAlignmentChangeDeclaration(directiveResults.NewAlignment); } currAlignment = directiveResults.NewAlignment; currSegmentType = directiveResults.NewSegment; // if our segment type is valid, then we're processing actual data versus an assembler directive. if (!directiveResults.IsLineAssemblerDirective && currSegmentType != SegmentType.Invalid) { if (!TryHandlingLinkageDeclaration(line, lineNum, objFile)) { ISegmentCodeGenerator codeGen = m_CodeGenFac.GetCodeGeneratorForSegment(currSegmentType); var asmLine = new LineData(line, lineNum); try { codeGen.GenerateCodeForSegment(fileName, asmLine, objFile, currAlignment); } catch (AssemblyException) { throw; } catch (Exception ex) { throw new AssemblyException(lineNum, ex.Message); } } } } } catch (AssemblyException ex) { exceptionList.Add(ex); } } // if any exceptions were encountered, throw an aggregate exception with // all of the encountered exceptions. if (exceptionList.Any()) { throw new AggregateAssemblyError(exceptionList); } // reset the StreamReader to the beginning position. reader.Seek(0, SeekOrigin.Begin); }
public CssSyntaxErrorException(string Message, TokenStream Stream) : base("Syntax", $"{Message} @: \"{ParserCommon.Get_Location(Stream.AsSpan())}\"") { }
/// <summary> /// Generates the byte representation of an instruction from a line of assembly code. /// </summary> /// <param name="asmLine">The line to parse.</param> /// <param name="objFile">The object file that will be written to.</param> /// <param name="currAlignment">The current specified alignment of the file.</param> public void GenerateCodeForSegment(string fileName, LineData asmLine, BasicObjectFile objFile, int currAlignment) { string[] tokens = asmLine.Text.Split(' ', '\t'); string[] fixedTokens = ParserCommon.GetTrimmedTokenArray(tokens).ToArray(); bool foundDataDeclaration = false; int dataDeclarationIdx = 0; for (int i = 0; i < fixedTokens.Length && !foundDataDeclaration; ++i) { if (ParserCommon.IsDataDeclaration(fixedTokens[i])) { foundDataDeclaration = true; dataDeclarationIdx = i; } } // we found a data declaration; make sure that there's at least one value following it. if (foundDataDeclaration) { if (dataDeclarationIdx + 1 < fixedTokens.Length) { // if it is a trivial type, use our precomputed map to get the size. if (ParserCommon.IsTrivialDataType(fixedTokens[dataDeclarationIdx])) { // determine before writing the next data element if we need to add padding. int paddingSize = ParserCommon.GetNumPaddingBytes(m_NumBytesLaidOut, currAlignment); int dataSize = ParserCommon.DetermineTrivialDataSize(fixedTokens[dataDeclarationIdx]); int numElements = ParserCommon.GetArraySize(asmLine.Text, fixedTokens[dataDeclarationIdx]); if (dataSize > paddingSize) { // add as much padding as we need to reach the next alignment boundary. for (int i = 0; i < paddingSize; ++i) { objFile.AddDataElement((byte)0); m_NumBytesLaidOut += 1; } } int totalReservedSize = dataSize * numElements; m_NumBytesLaidOut += totalReservedSize; AddTrivialDataElementsToFile(objFile, dataSize, asmLine.Text, fixedTokens[dataDeclarationIdx]); } // see if we can figure out the string length else if (ParserCommon.IsStringDeclaration(fixedTokens[dataDeclarationIdx])) { int paddingSize = ParserCommon.GetNumPaddingBytes(m_NumBytesLaidOut, currAlignment); // if this is a string declaration, then get the original string data string dataStr = ParserCommon.GetStringData(asmLine.Text); int dataSize = ParserCommon.DetermineNonTrivialDataLength(fixedTokens[dataDeclarationIdx], dataStr); if (dataSize > paddingSize) { // add as much padding as we need to reach the next alignment boundary. for (int i = 0; i < paddingSize; ++i) { objFile.AddDataElement((byte)0); m_NumBytesLaidOut += 1; } } // add the string data to the object file. AddNonTrivialDataElementToObjectFile(objFile, fixedTokens[dataDeclarationIdx], dataStr); m_NumBytesLaidOut += dataSize; } // otherwise, this must be a .space declaration. just get the size following it. else { int dataSize = ParserCommon.DetermineNonTrivialDataLength(fixedTokens[dataDeclarationIdx], fixedTokens[dataDeclarationIdx + 1]); int paddingSize = ParserCommon.GetNumPaddingBytes(dataSize, currAlignment); // fill the space and padding with zeroes. for (int i = 0; i < dataSize + paddingSize; ++i) { objFile.AddDataElement((byte)0); } // we expect one token after this word. // otherwise, it may be garbage that we should detect. if (fixedTokens.Length > dataDeclarationIdx + 2) { throw new AssemblyException(asmLine.LineNum, "Unknown token \"" + fixedTokens[dataDeclarationIdx + 2] + "\" found."); } } } else { throw new AssemblyException(asmLine.LineNum, "Expected data value after token " + fixedTokens[dataDeclarationIdx]); } } // check to see if this is just a label. // otherwise, it is probably garbage that we should throw. else if (!ParserCommon.ContainsLabel(asmLine.Text)) { throw new AssemblyException(asmLine.LineNum, "Unable to ascertain data type from line " + asmLine.Text); } }
/// <summary> /// Parses an unlabeled line to calculate the appropriate address of the next element (if any). /// </summary> /// <param name="originalLine">The line data being parsed.</param> /// <param name="tokens">The string array of space-separated tokens.</param> /// <param name="alignment">The current alignment</param> /// <returns>A boolean determining if anything of use was parsed. If this is false, /// the line should be examined to make sure a symbol was at least parsed. Otherwise, /// this could indicate that garbage was on the line.</returns> private bool ParseUnlabeledLine(LineData originalLine, string[] tokens, int alignment) { bool foundDataDeclaration = false; int dataDeclarationIdx = 0; // scan it for a data size (e.g. .asciiz, .word, etc) for (int i = 0; i < tokens.Length && !foundDataDeclaration; ++i) { if (ParserCommon.IsDataDeclaration(tokens[i])) { foundDataDeclaration = true; dataDeclarationIdx = i; } } // we found a data declaration; make sure that there's at least one value following it. if (foundDataDeclaration) { int dataSize = 0; if (dataDeclarationIdx + 1 < tokens.Length) { // if it is a trivial type, use our precomputed map to get the size. if (ParserCommon.IsTrivialDataType(tokens[dataDeclarationIdx])) { int paddingSize = ParserCommon.GetNumPaddingBytes(m_TotalBytesLaidOut, alignment); dataSize = ParserCommon.DetermineTrivialDataSize(tokens[dataDeclarationIdx]); int numElementsToStore = ParserCommon.GetArraySize(originalLine.Text, tokens[dataDeclarationIdx]); if (dataSize > paddingSize) { // add as much padding as we need to reach the next alignment boundary. for (int i = 0; i < paddingSize; ++i) { ++m_CurrDataAddress; ++m_TotalBytesLaidOut; } } int reservedSize = numElementsToStore * dataSize; // need to fixup the address here, since we have committed to placing padding // here. if (m_UnresolvedSym != null) { m_UnresolvedSym.Address = m_CurrDataAddress; } m_CurrDataAddress += reservedSize; m_TotalBytesLaidOut += reservedSize; } // otherwise, we'd expect there to be another token after the data type. // see if we can figure out the string length else if (ParserCommon.IsStringDeclaration(tokens[dataDeclarationIdx])) { // if this is a string declaration, then get the original string data string dataStr = ParserCommon.GetStringData(originalLine.Text); dataSize = ParserCommon.DetermineNonTrivialDataLength(tokens[dataDeclarationIdx], dataStr); int paddingSize = ParserCommon.GetNumPaddingBytes(m_TotalBytesLaidOut, alignment); if (dataSize > paddingSize) { // add as much padding as we need to reach the next alignment boundary. for (int i = 0; i < paddingSize; ++i) { ++m_CurrDataAddress; ++m_TotalBytesLaidOut; } } // need to fixup the address here, since we have committed to placing padding // here. if (m_UnresolvedSym != null) { m_UnresolvedSym.Address = m_CurrDataAddress; } m_CurrDataAddress += dataSize; m_TotalBytesLaidOut += dataSize; } // otherwise, this must be a .space declaration. just get the size following it. else { int paddingSize = ParserCommon.GetNumPaddingBytes(m_TotalBytesLaidOut, alignment); dataSize = ParserCommon.DetermineNonTrivialDataLength(tokens[dataDeclarationIdx], tokens[dataDeclarationIdx + 1]); if (dataSize > paddingSize) { // add as much padding as we need to reach the next alignment boundary. for (int i = 0; i < paddingSize; ++i) { ++m_CurrDataAddress; ++m_TotalBytesLaidOut; } } // need to fixup the address here, since we have committed to placing padding // here. // need to really clean this logic up. if (m_UnresolvedSym != null) { m_UnresolvedSym.Address = m_CurrDataAddress; } m_CurrDataAddress += dataSize; m_TotalBytesLaidOut += dataSize; } } else { throw new AssemblyException(originalLine.LineNum, "Expected data value after token " + tokens[dataDeclarationIdx]); } if (m_UnresolvedSym != null) { m_UnresolvedSym.Size = dataSize; m_UnresolvedSym = null; } } return(foundDataDeclaration); }