/// <summary> /// Constructor. /// </summary> /// <param name="label">Symbol's label.</param> /// <param name="value">Symbol's value.</param> /// <param name="source">Symbol source (general point of origin).</param> /// <param name="type">Symbol type.</param> /// <param name="formatSubType">Format descriptor sub-type, so we know how the /// user wants the value to be displayed.</param> /// <param name="comment">End-of-line comment.</param> /// <param name="tag">Symbol tag, used for grouping platform symbols.</param> public DefSymbol(string label, int value, Source source, Type type, FormatDescriptor.SubType formatSubType, string comment, string tag) : this(label, value, source, type) { Debug.Assert(comment != null); Debug.Assert(tag != null); // Length doesn't matter; use 1 to get prefab object. DataDescriptor = FormatDescriptor.Create(1, FormatDescriptor.Type.NumericLE, formatSubType); Comment = comment; Tag = tag; }
/// <summary> /// Creates a FormatDescriptor from a SerFormatDescriptor. /// </summary> /// <param name="sfd">Deserialized data.</param> /// <param name="report">Error report object.</param> /// <param name="dfd">Created FormatDescriptor.</param> /// <returns>True on success.</returns> private static bool CreateFormatDescriptor(SerFormatDescriptor sfd, FileLoadReport report, out FormatDescriptor dfd) { dfd = null; FormatDescriptor.Type format; FormatDescriptor.SubType subFormat; try { format = (FormatDescriptor.Type)Enum.Parse( typeof(FormatDescriptor.Type), sfd.Format); subFormat = (FormatDescriptor.SubType)Enum.Parse( typeof(FormatDescriptor.SubType), sfd.SubFormat); } catch (ArgumentException) { report.Add(FileLoadItem.Type.Warning, Properties.Resources.ERR_BAD_FD_FORMAT + ": " + sfd.Format + "/" + sfd.SubFormat); return(false); } if (sfd.SymbolRef == null) { dfd = FormatDescriptor.Create(sfd.Length, format, subFormat); } else { WeakSymbolRef.Part part; try { part = (WeakSymbolRef.Part)Enum.Parse( typeof(WeakSymbolRef.Part), sfd.SymbolRef.Part); } catch (ArgumentException) { report.Add(FileLoadItem.Type.Warning, Properties.Resources.ERR_BAD_SYMREF_PART + ": " + sfd.SymbolRef.Part); return(false); } dfd = FormatDescriptor.Create(sfd.Length, new WeakSymbolRef(sfd.SymbolRef.Label, part), format == FormatDescriptor.Type.NumericBE); } return(true); }
/// <summary> /// Constructor. General form. /// </summary> /// <param name="label">Symbol's label.</param> /// <param name="value">Symbol's value.</param> /// <param name="source">Symbol source (general point of origin).</param> /// <param name="type">Symbol type.</param> /// <param name="formatSubType">Format descriptor sub-type, so we know how the /// user wants the value to be displayed.</param> /// <param name="width">Variable width.</param> /// <param name="widthSpecified">True if width was explicitly specified. If this is /// <param name="comment">End-of-line comment.</param> /// <param name="direction">I/O direction.</param> /// <param name="multiMask">Bit mask to apply before comparisons.</param> /// <param name="tag">Symbol tag, used for grouping platform symbols.</param> /// false, the value of the "width" argument is ignored.</param> public DefSymbol(string label, int value, Source source, Type type, LabelAnnotation labelAnno, FormatDescriptor.SubType formatSubType, int width, bool widthSpecified, string comment, DirectionFlags direction, MultiAddressMask multiMask, string tag) : this(label, value, source, type, labelAnno) { Debug.Assert(comment != null); Debug.Assert(tag != null); if (widthSpecified && type == Type.Constant && source != Source.Variable) { // non-variable constants don't have a width; override arg Debug.WriteLine("Overriding constant DefSymbol width"); widthSpecified = false; } HasWidth = widthSpecified; if (!widthSpecified) { width = DEFAULT_WIDTH; } Debug.Assert(width >= MIN_WIDTH && width <= MAX_WIDTH); DataDescriptor = FormatDescriptor.Create(width, FormatDescriptor.Type.NumericLE, formatSubType); Comment = comment; Debug.Assert(((int)direction & ~(int)DirectionFlags.ReadWrite) == 0); Direction = direction; // constants don't have masks if (type != Type.Constant) { MultiMask = multiMask; } Tag = tag; }
/// <summary> /// Analyzes a range of bytes, looking for opportunities to promote uncategorized /// data to a more structured form. /// </summary> /// <param name="start">Offset of first byte in range.</param> /// <param name="end">Offset of last byte in range.</param> private void AnalyzeRange(int start, int end) { // TODO(someday): consider copying the buffer into a string and using Regex. This // can be done fairly quickly with "unsafe" code, e.g.: // https://stackoverflow.com/questions/3028768/net-regular-expressions-on-bytes-instead-of-chars // Could be useful for ASCII stuff and the repeated-byte detector, e.g.: // https://stackoverflow.com/questions/1660694/regular-expression-to-match-any-character-being-repeated-more-than-10-times mDebugLog.LogI("Analyzing +" + start.ToString("x6") + " - +" + end.ToString("x6")); int minStringChars = mAnalysisParams.MinCharsForString; bool doAnalysis = mAnalysisParams.AnalyzeUncategorizedData; FormatDescriptor oneByteDefault = FormatDescriptor.Create(1, FormatDescriptor.Type.Default, FormatDescriptor.SubType.None); FormatDescriptor.DebugPrefabBump(-1); while (start <= end) { if (!doAnalysis) { // Analysis is disabled, so just mark everything as single-byte data. mAnattribs[start].DataDescriptor = oneByteDefault; FormatDescriptor.DebugPrefabBump(); start++; continue; } // Check for block of repeated values. int length = RecognizeRun(mFileData, start, end); bool isAscii = TextUtil.IsPrintableAscii((char)(mFileData[start] & 0x7f)); if (length >= MIN_RUN_LENGTH) { // Output as run or ASCII string. Prefer ASCII if the string is short // enough to fit on one line (e.g. 64 chars including delimiters) and // meets the minimum string length threshold. if (isAscii && length <= MIN_RUN_LENGTH_ASCII && length >= minStringChars) { // string -- if we create the descriptor here, we save a little time, // but strings like "*****hello" turn into two separate strings. //LogV(start, "String from run of '" + (char)(mFileData[start] & 0x7f) + // "': " + length + " bytes"); //mAnattribs[start].DataDescriptor = FormatDescriptor.CreateDescriptor( // length, FormatDescriptor.Type.String, // FormatDescriptor.SubType.None); //start += length; //continue; } else { // run LogV(start, "Run of 0x" + mFileData[start].ToString("x2") + ": " + length + " bytes"); mAnattribs[start].DataDescriptor = FormatDescriptor.Create( length, FormatDescriptor.Type.Fill, FormatDescriptor.SubType.None); start += length; continue; } } length = RecognizeAscii(mFileData, start, end); if (length >= minStringChars) { LogV(start, "ASCII string, len=" + length + " bytes"); mAnattribs[start].DataDescriptor = FormatDescriptor.Create(length, FormatDescriptor.Type.String, FormatDescriptor.SubType.None); start += length; continue; } // Nothing found, output as single byte. This is the easiest form for users // to edit. mAnattribs[start].DataDescriptor = oneByteDefault; FormatDescriptor.DebugPrefabBump(); // It's tempting to advance by the "length" result from RecognizeRun, and if // we were just looking for runs of identical bytes we could. However, that // would lose short ASCII strings that began with repeated bytes, e.g. "---%". start++; } }
/// <summary> /// Analyzes uncategorized regions of the file to see if they fit common patterns. /// /// This is re-run after most changes to the project, so we don't want to do anything /// crazily expensive. /// </summary> /// <returns>True on success.</returns> public void AnalyzeUncategorized() { // TODO(someday): we can make this faster. The data doesn't change, so we // only need to do a full scan once, when the file is first loaded. We can // create a TypedRangeSet for runs of identical bytes, using the byte value // as the type. A second TypedRangeSet would identify runs of ASCII chars, // with different types for high/low ASCII (and PETSCII?). AnalyzeRange() would // then just need to find the intersection with the sets, which should be // significantly faster. We would need to re-do the scan if the parameters // for things like min match length change. FormatDescriptor oneByteDefault = FormatDescriptor.Create(1, FormatDescriptor.Type.Default, FormatDescriptor.SubType.None); FormatDescriptor.DebugPrefabBump(-1); // If it hasn't been identified as code or data, set the "data" flag to // give it a positive identification as data. (This should be the only // place outside of CodeAnalysis that sets this flag.) This isn't strictly // necessary, but it helps us assert things when pieces start moving around. for (int offset = 0; offset < mAnattribs.Length; offset++) { Anattrib attr = mAnattribs[offset]; if (attr.IsInlineData) { // While we're here, add a default format descriptor for inline data // that doesn't have one. We don't try to analyze it otherwise. if (attr.DataDescriptor == null) { mAnattribs[offset].DataDescriptor = oneByteDefault; FormatDescriptor.DebugPrefabBump(); } } else if (!attr.IsInstruction) { mAnattribs[offset].IsData = true; } } mDebugLog.LogI("Analyzing uncategorized data..."); int startOffset = -1; for (int offset = 0; offset < mAnattribs.Length;) { // We want to find a contiguous series of offsets which are not known // to hold code or data. We stop if we encounter a user-defined label // or format descriptor. Anattrib attr = mAnattribs[offset]; if (attr.IsInstruction || attr.IsInlineData || attr.IsDataStart) { // Instruction, inline data, or formatted data known to be here. Analyze // previous chunk, then advance past this. if (startOffset >= 0) { AnalyzeRange(startOffset, offset - 1); startOffset = -1; } if (attr.IsInstruction) { // Because of embedded instructions, we can't simply leap forward. offset++; } else { Debug.Assert(attr.Length > 0); offset += attr.Length; } } else if (attr.Symbol != null || mProject.HasCommentOrNote(offset)) { // In an uncategorized area, but we want to break at this byte // so the user or auto label doesn't get buried in the middle of // a large chunk. // // This is similar to, but independent of, GroupedOffsetSetFromSelected() // in ProjectView. This is for auto-detection, the other is for user // selection. It's best if the two behave similarly though. if (startOffset >= 0) { AnalyzeRange(startOffset, offset - 1); } startOffset = offset; offset++; } else { // This offset is uncategorized, keep gathering. if (startOffset < 0) { startOffset = offset; } offset++; // Check to see if the address has changed from the previous entry. if (offset < mAnattribs.Length && mAnattribs[offset - 1].Address + 1 != mAnattribs[offset].Address) { // Must be an ORG here. Scan previous region. AnalyzeRange(startOffset, offset - 1); startOffset = -1; } } } if (startOffset >= 0) { AnalyzeRange(startOffset, mAnattribs.Length - 1); } }
/// <summary> /// Creates a FormatDescriptor in the Anattrib array at srcOffset that links to /// targetOffset, or a nearby label. If targetOffset doesn't have a useful label, /// one will be generated. /// /// This is used for both instruction and data operands. /// </summary> /// <param name="srcOffset">Offset of instruction or address data.</param> /// <param name="srcLen">Length of instruction or data item.</param> /// <param name="targetOffset">Offset of target.</param> private void SetDataTarget(int srcOffset, int srcLen, int targetOffset) { // NOTE: don't try to cache mAnattribs[targetOffset] -- we may be changing // targetOffset and/or altering the Anattrib entry, so grabbing a copy of the // struct may lead to problems. // If the target offset has a symbol assigned, use it. Otherwise, try to // find something nearby that might be more appropriate. int origTargetOffset = targetOffset; if (mAnattribs[targetOffset].Symbol == null) { if (mAnalysisParams.SeekNearbyTargets) { targetOffset = FindAlternateTarget(srcOffset, targetOffset); } // If we're not interested in seeking nearby targets, or we are but we failed // to find something useful, we need to make sure that we're not pointing // into the middle of the instruction. The assembler will only see labels on // the opcode bytes, so if we're pointing at the middle we need to back up. if (mAnattribs[targetOffset].IsInstruction && !mAnattribs[targetOffset].IsInstructionStart) { while (!mAnattribs[--targetOffset].IsInstructionStart) { // Should not be possible to move past the start of the file, // since we know we're in the middle of an instruction. Debug.Assert(targetOffset > 0); } } else if (!mAnattribs[targetOffset].IsInstruction && !mAnattribs[targetOffset].IsStart) { // This is not part of an instruction, and is not the start of a formatted // data area. However, it might be part of a formatted data area, in which // case we need to avoid creating an auto label in the middle. So we seek // backward, looking for the first offset with a descriptor. If that // descriptor includes this offset, we set the target offset to that. // (Note the uncategorized data pass hasn't run yet, so only instructions // and offsets identified by users or scripts have been categorized.) int scanOffset = targetOffset; while (--scanOffset > 0) { FormatDescriptor dfd = mAnattribs[scanOffset].DataDescriptor; if (dfd != null && scanOffset + dfd.Length > targetOffset) { // Descriptor encompasses target offset. Adjust target. targetOffset = scanOffset; break; } } } } if (mAnattribs[targetOffset].Symbol == null) { // No label at target offset, generate one. // // Generally speaking, the label we generate will be unique, because it // incorporates the address. It's possible through various means to end // up with a user or platform label that matches an auto label, so we // need to do some renaming in that case. Shouldn't happen often. Symbol sym = SymbolTable.GenerateUniqueForAddress(mAnattribs[targetOffset].Address, mProject.SymbolTable); mAnattribs[targetOffset].Symbol = sym; // This will throw if the symbol already exists. That is the desired // behavior, as that would be a bug. mProject.SymbolTable.Add(sym); } // Create a Numeric/Symbol descriptor that references the target label. If the // source offset already had a descriptor (e.g. Numeric/Address data item), // this will replace it in the Anattrib array. (The user-specified format // is unaffected.) // // Doing this by target symbol, rather than offset in a Numeric/Address item, // allows us to avoid carrying the adjustment stuff everywhere. OTOH we have // to manually refactor label renames in the display list if we don't want to // redo the data analysis. bool isBigEndian = false; if (mAnattribs[srcOffset].DataDescriptor != null) { LogD(srcOffset, "Replacing " + mAnattribs[srcOffset].DataDescriptor + " with reference to " + mAnattribs[targetOffset].Symbol.Label + ", adj=" + (origTargetOffset - targetOffset)); if (mAnattribs[srcOffset].DataDescriptor.FormatType == FormatDescriptor.Type.NumericBE) { isBigEndian = true; } } else { LogV(srcOffset, "Creating weak reference to label " + mAnattribs[targetOffset].Symbol.Label + ", adj=" + (origTargetOffset - targetOffset)); } mAnattribs[srcOffset].DataDescriptor = FormatDescriptor.Create(srcLen, new WeakSymbolRef(mAnattribs[targetOffset].Symbol.Label, WeakSymbolRef.Part.Low), isBigEndian); }
/// <summary> /// Creates a FormatDescriptor from a SerFormatDescriptor. /// </summary> /// <param name="sfd">Deserialized data.</param> /// <param name="version">Serialization version (CONTENT_VERSION).</param> /// <param name="report">Error report object.</param> /// <param name="dfd">Created FormatDescriptor.</param> /// <returns>True on success.</returns> private static bool CreateFormatDescriptor(SerFormatDescriptor sfd, int version, FileLoadReport report, out FormatDescriptor dfd) { dfd = null; FormatDescriptor.Type format; FormatDescriptor.SubType subFormat; if ("String".Equals(sfd.Format)) { // File version 1 used a different set of enumerated values for defining strings. // Parse it out here. Debug.Assert(version <= 1); subFormat = FormatDescriptor.SubType.ASCII_GENERIC; if ("None".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringGeneric; } else if ("Reverse".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringReverse; } else if ("CString".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringNullTerm; } else if ("L8String".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringL8; } else if ("L16String".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringL16; } else if ("Dci".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringDci; } else if ("DciReverse".Equals(sfd.SubFormat)) { // No longer supported. Nobody ever used this but the regression tests, // though, so there's no reason to handle this nicely. format = FormatDescriptor.Type.Dense; subFormat = FormatDescriptor.SubType.None; } else { // No idea what this is; output as dense hex. format = FormatDescriptor.Type.Dense; subFormat = FormatDescriptor.SubType.None; } Debug.WriteLine("Found v1 string, fmt=" + format + ", sub=" + subFormat); dfd = FormatDescriptor.Create(sfd.Length, format, subFormat); return(dfd != null); } try { format = (FormatDescriptor.Type)Enum.Parse( typeof(FormatDescriptor.Type), sfd.Format); if (version <= 1 && "Ascii".Equals(sfd.SubFormat)) { // File version 1 used "Ascii" for all character data in numeric operands. // It applied to both low and high ASCII. subFormat = FormatDescriptor.SubType.ASCII_GENERIC; Debug.WriteLine("Found v1 char, fmt=" + sfd.Format + ", sub=" + sfd.SubFormat); } else { subFormat = (FormatDescriptor.SubType)Enum.Parse( typeof(FormatDescriptor.SubType), sfd.SubFormat); } } catch (ArgumentException) { report.Add(FileLoadItem.Type.Warning, Res.Strings.ERR_BAD_FD_FORMAT + ": " + sfd.Format + "/" + sfd.SubFormat); return(false); } if (sfd.SymbolRef == null) { dfd = FormatDescriptor.Create(sfd.Length, format, subFormat); } else { WeakSymbolRef.Part part; try { part = (WeakSymbolRef.Part)Enum.Parse( typeof(WeakSymbolRef.Part), sfd.SymbolRef.Part); } catch (ArgumentException) { report.Add(FileLoadItem.Type.Warning, Res.Strings.ERR_BAD_SYMREF_PART + ": " + sfd.SymbolRef.Part); return(false); } dfd = FormatDescriptor.Create(sfd.Length, new WeakSymbolRef(sfd.SymbolRef.Label, part), format == FormatDescriptor.Type.NumericBE); } return(dfd != null); }