public void WriteObjectFile(string fileName, BasicObjectFile file) { byte[] dataBytes; // "trick" our object file into writing data into our arrays. using (var strm = new MemoryStream()) { foreach (var dataElement in file.DataElements) { dataElement.WriteDataToFile(strm); } dataBytes = strm.ToArray(); } byte[] textBytes; using (var strm = new MemoryStream()) { foreach (var textElement in file.TextElements) { textElement.WriteDataToFile(strm); } textBytes = strm.ToArray(); } using (var underlyingWriter = new ELF_Wrapper.ELF_Writer()) { underlyingWriter.AddDataSection(dataBytes, Common.CommonConstants.BASE_DATA_ADDRESS); underlyingWriter.AddTextSection(textBytes, Common.CommonConstants.BASE_TEXT_ADDRESS); underlyingWriter.AddSymbolTable(file.SymbolTable); underlyingWriter.WriteFile(fileName); } }
/// <summary> /// Tries to handle any linkage declarations /// </summary> /// <param name="trimmedLine">The line with leading/trailing whitespace removed.</param> /// <param name="lineNum">The current line number.</param> /// <param name="objFile">The basic object file that will be written to.</param> /// <returns>Returns true if a linkage directive was processed in this line. Otherwise, returns false.</returns> private bool TryHandlingLinkageDeclaration(string trimmedLine, int lineNum, BasicObjectFile objFile) { // tokenize the line; string[] tokens = trimmedLine.Split(' '); bool isLinkageDec = false; if (IsLinkageDeclaration(tokens[0])) { isLinkageDec = true; // we expect three tokens, if (tokens[0] == ".extern") { int declarationSize = 0; if (tokens.Length != 3) { throw new AssemblyException(lineNum, "Expected symbol name and byte size declaration after .extern token."); } else if (!int.TryParse(tokens[2], out declarationSize)) { throw new AssemblyException(lineNum, ".extern requires a non-negative 32-bit integer size."); } else if (declarationSize < 0) { throw new AssemblyException(lineNum, ".extern requires a non-negative 32-bit integer size."); } else { objFile.AddExternElement(declarationSize); } } } return(isLinkageDec); }
/// <summary> /// Adds a non-trivial data size element to the object file. /// </summary> /// <param name="objFile">The object file to add the element to.</param> /// <param name="dataType">The string token declaring the data type</param> /// <param name="elemValue">The string</param> /// <param name="lineNum"></param> private void AddNonTrivialDataElementToObjectFile(BasicObjectFile objFile, string dataType, string elemValue) { if (dataType == ".ascii") { objFile.AddAsciiString(elemValue); } else if (dataType == ".asciiz") { objFile.AddNullTerminatedAsciiString(elemValue); } else if (dataType == ".space") { int numReservedSpace = ParserCommon.DetermineNonTrivialDataLength(dataType, elemValue); for (int i = 0; i < numReservedSpace; ++i) { objFile.AddDataElement((byte)0); } } else { throw new ArgumentException("Unknown data type " + dataType + " passed as non-trivial data type."); } }
/// <summary> /// Outputs all data in the BasicObjectFile to the specified format. /// </summary> /// <param name="fileName">The file path to generate the output at.</param> /// <param name="file">The data that will be written to the file.</param> public void WriteObjectFile(string fileName, BasicObjectFile file) { using (FileStream fs = File.Open(fileName, FileMode.Create)) { using (MemoryStream tmpStrm = new MemoryStream()) { // write the .data segment metadata SegmentData dataMdataInfo; dataMdataInfo.SegmentName = ".dmdta"; dataMdataInfo.StartingOffset = CalculateSegmentOffset(tmpStrm.Position); dataMdataInfo.SegmentSize = WriteMetadataToFile(tmpStrm, file.DataElements); // write the actual .data segment SegmentData dataInfo; dataInfo.SegmentName = ".data"; dataInfo.StartingOffset = CalculateSegmentOffset(tmpStrm.Position); dataInfo.SegmentSize = WriteDataToFile(tmpStrm, file.DataElements); SegmentData textInfo; textInfo.SegmentName = ".text"; textInfo.StartingOffset = CalculateSegmentOffset(tmpStrm.Position); textInfo.SegmentSize = WriteDataToFile(tmpStrm, file.TextElements); // write the .extern segment. SegmentData externInfo; externInfo.SegmentName = ".extern"; externInfo.StartingOffset = CalculateSegmentOffset(tmpStrm.Position); externInfo.SegmentSize = WriteDataToFile(tmpStrm, file.ExternElements); // write the symbol table SegmentData symInfo; symInfo.SegmentName = ".symtbl"; symInfo.StartingOffset = CalculateSegmentOffset(tmpStrm.Position); symInfo.SegmentSize = WriteSymbolTableToFile(tmpStrm, file.SymbolTable); // write the source map table segment. SegmentData srcMapInfo; srcMapInfo.SegmentName = ".srcmap"; srcMapInfo.StartingOffset = CalculateSegmentOffset(tmpStrm.Position); srcMapInfo.SegmentSize = WriteSourceMappingInfoToFile(tmpStrm, file.DebugData); // add the various sizes. // the order matters, as this is the order in which they are written to the header. var sizeList = new List <SegmentData>(); sizeList.Add(dataMdataInfo); sizeList.Add(dataInfo); sizeList.Add(textInfo); sizeList.Add(srcMapInfo); sizeList.Add(externInfo); sizeList.Add(symInfo); // write the actual file header, now that we know our absolute offsets WriteHeader(fs, sizeList); // copy the temp stream to the actual file stream. tmpStrm.Seek(0, SeekOrigin.Begin); tmpStrm.CopyTo(fs); } } }
/// <summary> /// Adds a half element to the object file. /// </summary> /// <param name="objFile">The object file to add to.</param> /// <param name="fullText">The full text of the assembly line.</param> /// <param name="declarationToken">The token specifying the declaration of the size parameter.</param> private void AddShortElementToFile(BasicObjectFile objFile, string fullText, string declarationToken) { // find the token directly after the size directive int substrBeginIdx = fullText.IndexOf(declarationToken) + declarationToken.Length; string arguments = fullText.Substring(substrBeginIdx); // split by commas. string[] tokenizedArgs = arguments.Split(new[] { ',' }); tokenizedArgs = tokenizedArgs.Apply((str) => str.Trim()).ToArray(); // iterate through each element in the "array". foreach (string token in tokenizedArgs) { // if it contains a ':' character, then this itself is an array of the initialized token. if (token.Contains(':')) { string[] subtokens = token.Split(new[] { ':' }).Apply((str) => str.Trim()).ToArray(); if (subtokens.Length == 2) { int numElems = int.Parse(subtokens[1]); // this syntax is wonky; we're trying to parse literal char elements // as well as normal bytes here. if (!IntExtensions.TryParseEx(subtokens[0], out short elemToAdd)) { // see if we can resolve the string as a symbol. Symbol sym = m_SymTbl.GetSymbol(subtokens[0]); elemToAdd = (short)sym.Address; } for (int i = 0; i < numElems; ++i) { objFile.AddDataElement(elemToAdd); } } else { throw new ArgumentException("Expected size parameter after ':' token."); } } else { // otherwise, it should just be an element (without any size modifiers). // just parse it and add it. if (!IntExtensions.TryParseEx(token, out short elemToAdd)) { // see if we can resolve the string as a symbol. Symbol sym = m_SymTbl.GetSymbol(token); elemToAdd = (short)sym.Address; } objFile.AddDataElement(elemToAdd); } } }
/// <summary> /// Adds either one or multiple trivially sized data elements to the object file. /// </summary> /// <param name="objFile">The object file to add to.</param> /// <param name="dataSize">The data size of the element to add.</param> /// <param name="fullText">The full text of the assembly line.</param> /// <param name="declarationToken">The token declaring the size of the data.</param> private void AddTrivialDataElementsToFile(BasicObjectFile objFile, int dataSize, string fullText, string declarationToken) { const int BYTE_DATA_SIZE = 1; const int SHORT_DATA_SIZE = 2; const int WORD_DATA_SIZE = 4; const int DWORD_DATA_SIZE = 8; switch (dataSize) { case BYTE_DATA_SIZE: { AddByteElementToFile(objFile, fullText, declarationToken); break; } case SHORT_DATA_SIZE: { AddShortElementToFile(objFile, fullText, declarationToken); break; } case WORD_DATA_SIZE: { AddIntElementToFile(objFile, fullText, declarationToken); break; } case DWORD_DATA_SIZE: { AddLongElementToFile(objFile, fullText, declarationToken); break; } default: { throw new ArgumentException("Unknown data size passed to AddTrivialDataElementToObjectFile"); } } }
/// <summary> /// Generates code for a .text instruction in the file. /// </summary> /// <param name="fileName">The source file name.</param> /// <param name="asmLine">The line to parse.</param> /// <param name="objFile">The object file that will be written to.</param> /// <param name="currAlignment">The current specified alignment of the file. Unused for .text parsers.</param> public void GenerateCodeForSegment(string fileName, LineData asmLine, BasicObjectFile objFile, int currAlignment) { // scan to the first instruction. // this could share the same line as a label, so split on ':' and ',' string[] tokenizedStr = asmLine.Text.Split(new char[] { ',', ':', ' ' }, StringSplitOptions.RemoveEmptyEntries); bool foundInstruction = false; string instructionToken = string.Empty; for (int i = 0; i < tokenizedStr.Length && !foundInstruction; ++i) { string token = tokenizedStr[i].Trim(); // we found our instruction. build a string from this token // to the end of the array. if (m_ParserFac.IsInstruction(token)) { foundInstruction = true; instructionToken = token; } } if (foundInstruction) { // first, validate that the instruction is not the last token in the string. // try to parse the instruction parameters // get the substring starting at the index of the next character after the instruction string instSubstring = asmLine.Text.Substring(asmLine.Text.IndexOf(instructionToken) + instructionToken.Length); //split the substring at the comma to get the instruction parameters. string[] argTokens = instSubstring.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); // trim whitespace from the beginning and end of each token. argTokens = argTokens.Apply((str) => str.Trim()).ToArray(); // find the parser for the instruction. IInstructionGenerator parser = m_ParserFac.GetProcessorForInstruction(instructionToken); // beq instructions should (hopefully) not generate multiple instructions.. IEnumerable <int> generatedInstructions = parser.GenerateCodeForInstruction(m_CurrTextAddress, argTokens); var srcInfo = new SourceLineInformation(fileName, asmLine.LineNum, m_CurrTextAddress, asmLine.Text); objFile.AddSourceInformation(srcInfo); foreach (int generatedInstruction in generatedInstructions) { objFile.AddInstruction(generatedInstruction); m_CurrTextAddress += CommonConstants.BASE_INSTRUCTION_SIZE_BYTES; } } else { // if not an instruction (may be a symbol) // preprocesor instruction shouldn't bring us here. // make sure the user is not typing garbage. string symStr = tokenizedStr[0]; if (!m_SymTbl.ContainsSymbol(symStr)) { throw new AssemblyException(asmLine.LineNum, "Unknown instruction \"" + asmLine.Text + "\" found."); } } }
/// <summary> /// Generates code for the given assembly file. /// </summary> /// <param name="reader">A StreamReader instance that will read the input assembly file.</param> /// <param name="objFile">The basic object file that will be written to.</param> public void GenerateCode(string fileName, StreamReader reader, BasicObjectFile objFile) { SegmentType currSegmentType = SegmentType.Invalid; int currAlignment = CommonConstants.DEFAULT_ALIGNMENT; // a list of all exceptions we encounter during parsing. // users can view them all at once instead of working through them piecemeal. var exceptionList = new List <AssemblyException>(); int lineNum = 0; while (!reader.EndOfStream) { try { ++lineNum; // trim the whitespace from any read-in line. string line = reader.ReadLine().Trim(); // get a substring up until the commented line, ignoring those in user quotes. line = ParserCommon.GetSanitizedString(line); // ignore blank lines. trim should remove all whitespace if (line.Any()) { LineParseResults directiveResults = ParserCommon.HandleAssemblerDirective(line, lineNum, currSegmentType, currAlignment); // if we have a new data alignment, we need to record this in case the disassembler // parses this file. otherwise, it will interpret padding bytes as actual data elements. if (currAlignment != directiveResults.NewAlignment) { objFile.AddAlignmentChangeDeclaration(directiveResults.NewAlignment); } currAlignment = directiveResults.NewAlignment; currSegmentType = directiveResults.NewSegment; // if our segment type is valid, then we're processing actual data versus an assembler directive. if (!directiveResults.IsLineAssemblerDirective && currSegmentType != SegmentType.Invalid) { if (!TryHandlingLinkageDeclaration(line, lineNum, objFile)) { ISegmentCodeGenerator codeGen = m_CodeGenFac.GetCodeGeneratorForSegment(currSegmentType); var asmLine = new LineData(line, lineNum); try { codeGen.GenerateCodeForSegment(fileName, asmLine, objFile, currAlignment); } catch (AssemblyException) { throw; } catch (Exception ex) { throw new AssemblyException(lineNum, ex.Message); } } } } } catch (AssemblyException ex) { exceptionList.Add(ex); } } // if any exceptions were encountered, throw an aggregate exception with // all of the encountered exceptions. if (exceptionList.Any()) { throw new AggregateAssemblyError(exceptionList); } // reset the StreamReader to the beginning position. reader.Seek(0, SeekOrigin.Begin); }
/// <summary> /// Task for assembling one individual file. /// </summary> /// <param name="inputFile">The input file to assemble.</param> /// <param name="logger">The logging implementation to log errors/info to.</param> /// <param name="options">The options to use while assembling.</param> /// <returns>True if the assembler could successfully generate code for the file; otherwise returns false.</returns> public AssemblerResult AssembleFile(string inputFile, string outputFile, ILogger logger, AssemblerOptions options) { var result = new AssemblerResult(); logger.Log(LogLevel.Info, "Invoking assembler for file " + inputFile); try { bool furtherProcessingNeeded = true; if (File.Exists(inputFile) && File.Exists(outputFile)) { DateTime inputFileWriteTime = File.GetLastWriteTimeUtc(inputFile); DateTime outputFileWriteTime = File.GetLastWriteTimeUtc(outputFile); if (outputFileWriteTime > inputFileWriteTime) { logger.Log(LogLevel.Info, "Nothing to do for file " + inputFile); furtherProcessingNeeded = false; } } if (furtherProcessingNeeded) { using (var reader = new StreamReader(File.OpenRead(inputFile))) { var symTable = new SymbolTable(); // build the symbol table var instructionProcFac = new InstructionProcessorFactory(symTable); var symTableBuilder = new SymbolTableBuilder(logger, instructionProcFac); symTableBuilder.GenerateSymbolTableForSegment(reader, SegmentType.Data, symTable); symTableBuilder.GenerateSymbolTableForSegment(reader, SegmentType.Text, symTable); // use the symbol table to generate code with references resolved. var objFile = new BasicObjectFile(inputFile, symTable); var codeGenerator = new CodeGenerator(logger, symTable, instructionProcFac); codeGenerator.GenerateCode(inputFile, reader, objFile); if (!objFile.TextElements.Any()) { logger.Log(LogLevel.Warning, "No .text segment to assemble. Stop."); result.OperationSuccessful = false; } else { // write the object file out. var writerFac = new ObjectFileWriterFactory(); IObjectFileWriter writer = writerFac.GetWriterForOutputFile(outputFile); writer.WriteObjectFile(outputFile, objFile); } } } } catch (AggregateAssemblyError ex) { foreach (AssemblyException asEx in ex.AssemblyErrors) { logger.Log(LogLevel.Critical, "In file \"" + inputFile + "\" (line " + asEx.LineNumber + "):\n\t"); logger.Log(LogLevel.Critical, asEx.Message); result.AddUserAssemblyError(asEx); } } catch (Exception ex) { logger.Log(LogLevel.Critical, ex.StackTrace); logger.Log(LogLevel.Critical, ex.Message); if (ex.InnerException != null) { logger.Log(LogLevel.Critical, ex.InnerException.StackTrace); logger.Log(LogLevel.Critical, ex.InnerException.Message); } result.AddInternalAssemblerError(ex); } return(result); }
/// <summary> /// Generates the byte representation of an instruction from a line of assembly code. /// </summary> /// <param name="asmLine">The line to parse.</param> /// <param name="objFile">The object file that will be written to.</param> /// <param name="currAlignment">The current specified alignment of the file.</param> public void GenerateCodeForSegment(string fileName, LineData asmLine, BasicObjectFile objFile, int currAlignment) { string[] tokens = asmLine.Text.Split(' ', '\t'); string[] fixedTokens = ParserCommon.GetTrimmedTokenArray(tokens).ToArray(); bool foundDataDeclaration = false; int dataDeclarationIdx = 0; for (int i = 0; i < fixedTokens.Length && !foundDataDeclaration; ++i) { if (ParserCommon.IsDataDeclaration(fixedTokens[i])) { foundDataDeclaration = true; dataDeclarationIdx = i; } } // we found a data declaration; make sure that there's at least one value following it. if (foundDataDeclaration) { if (dataDeclarationIdx + 1 < fixedTokens.Length) { // if it is a trivial type, use our precomputed map to get the size. if (ParserCommon.IsTrivialDataType(fixedTokens[dataDeclarationIdx])) { // determine before writing the next data element if we need to add padding. int paddingSize = ParserCommon.GetNumPaddingBytes(m_NumBytesLaidOut, currAlignment); int dataSize = ParserCommon.DetermineTrivialDataSize(fixedTokens[dataDeclarationIdx]); int numElements = ParserCommon.GetArraySize(asmLine.Text, fixedTokens[dataDeclarationIdx]); if (dataSize > paddingSize) { // add as much padding as we need to reach the next alignment boundary. for (int i = 0; i < paddingSize; ++i) { objFile.AddDataElement((byte)0); m_NumBytesLaidOut += 1; } } int totalReservedSize = dataSize * numElements; m_NumBytesLaidOut += totalReservedSize; AddTrivialDataElementsToFile(objFile, dataSize, asmLine.Text, fixedTokens[dataDeclarationIdx]); } // see if we can figure out the string length else if (ParserCommon.IsStringDeclaration(fixedTokens[dataDeclarationIdx])) { int paddingSize = ParserCommon.GetNumPaddingBytes(m_NumBytesLaidOut, currAlignment); // if this is a string declaration, then get the original string data string dataStr = ParserCommon.GetStringData(asmLine.Text); int dataSize = ParserCommon.DetermineNonTrivialDataLength(fixedTokens[dataDeclarationIdx], dataStr); if (dataSize > paddingSize) { // add as much padding as we need to reach the next alignment boundary. for (int i = 0; i < paddingSize; ++i) { objFile.AddDataElement((byte)0); m_NumBytesLaidOut += 1; } } // add the string data to the object file. AddNonTrivialDataElementToObjectFile(objFile, fixedTokens[dataDeclarationIdx], dataStr); m_NumBytesLaidOut += dataSize; } // otherwise, this must be a .space declaration. just get the size following it. else { int dataSize = ParserCommon.DetermineNonTrivialDataLength(fixedTokens[dataDeclarationIdx], fixedTokens[dataDeclarationIdx + 1]); int paddingSize = ParserCommon.GetNumPaddingBytes(dataSize, currAlignment); // fill the space and padding with zeroes. for (int i = 0; i < dataSize + paddingSize; ++i) { objFile.AddDataElement((byte)0); } // we expect one token after this word. // otherwise, it may be garbage that we should detect. if (fixedTokens.Length > dataDeclarationIdx + 2) { throw new AssemblyException(asmLine.LineNum, "Unknown token \"" + fixedTokens[dataDeclarationIdx + 2] + "\" found."); } } } else { throw new AssemblyException(asmLine.LineNum, "Expected data value after token " + fixedTokens[dataDeclarationIdx]); } } // check to see if this is just a label. // otherwise, it is probably garbage that we should throw. else if (!ParserCommon.ContainsLabel(asmLine.Text)) { throw new AssemblyException(asmLine.LineNum, "Unable to ascertain data type from line " + asmLine.Text); } }
/// <summary> /// Adds a byte element to the object file. /// </summary> /// <param name="objFile">The object file to add to.</param> /// <param name="fullText">The full text of the assembly line.</param> /// <param name="declarationToken">The token specifying the declaration of the size parameter.</param> private void AddByteElementToFile(BasicObjectFile objFile, string fullText, string declarationToken) { // find the token directly after the size directive int substrBeginIdx = fullText.IndexOf(declarationToken) + declarationToken.Length; string arguments = fullText.Substring(substrBeginIdx); // split by commas. string[] tokenizedArgs = arguments.Split(new[] { ',' }); tokenizedArgs = tokenizedArgs.Apply((str) => str.Trim()).ToArray(); // iterate through each element in the "array". foreach (string token in tokenizedArgs) { // if it contains a ':' character, then this itself is an array of the initialized token. if (token.Contains(':')) { string[] subtokens = token.Split(new[] { ':' }).Apply((str) => str.Trim()).ToArray(); if (subtokens.Length == 2) { int numElems = int.Parse(subtokens[1]); // first, try to get the value as a byte. if (!IntExtensions.TryParseEx(subtokens[0], out byte byteElem)) { // if we fail, then try parsing the character as a literal. if (!StringUtils.TryParseCharacterLiteralAsByte(subtokens[0], out byteElem)) { // as a fallback, see if we can resolve the string as a symbol. Symbol sym = m_SymTbl.GetSymbol(subtokens[0]); byteElem = (byte)sym.Address; } } for (int i = 0; i < numElems; ++i) { objFile.AddDataElement(byteElem); } } else { throw new ArgumentException("Expected size parameter after ':' token."); } } else { // otherwise, it should just be an element (without any size modifiers). // just parse it and add it. // first, try to get the value as a byte. if (!IntExtensions.TryParseEx(token, out byte byteElem)) { // if we fail, then try parsing the character as a literal. if (!StringUtils.TryParseCharacterLiteralAsByte(token, out byteElem)) { // as a fallback, see if we can resolve the string as a symbol. Symbol sym = m_SymTbl.GetSymbol(token); byteElem = (byte)sym.Address; } } objFile.AddDataElement(byteElem); } } }