/// <summary> /// Computes the number of lines of output required to hold the formatted output. /// </summary> /// <param name="formatter">Format definition.</param> /// <param name="dfd">Data format descriptor.</param> /// <returns>Line count.</returns> public static int ComputeRequiredLineCount(Formatter formatter, PseudoOpNames opNames, FormatDescriptor dfd, byte[] data, int offset) { if (dfd.IsString) { Debug.Assert(false); // shouldn't be calling here anymore List <string> lines = FormatStringOp(formatter, opNames, dfd, data, offset, out string popcode); return(lines.Count); } switch (dfd.FormatType) { case FormatDescriptor.Type.Default: case FormatDescriptor.Type.NumericLE: case FormatDescriptor.Type.NumericBE: case FormatDescriptor.Type.Fill: case FormatDescriptor.Type.Junk: return(1); case FormatDescriptor.Type.Dense: { // no delimiter, two output bytes per input byte int maxLen = MAX_OPERAND_LEN; int textLen = dfd.Length * 2; return((textLen + maxLen - 1) / maxLen); } default: Debug.Assert(false); return(1); } }
/// <summary> /// Extracts the operand offset from a data item. /// </summary> /// <param name="proj">Project reference.</param> /// <param name="offset">Offset of data item.</param> /// <returns>Operand offset, or -1 if not applicable.</returns> public static int GetDataOperandOffset(DisasmProject proj, int offset) { Anattrib attr = proj.GetAnattrib(offset); if (!attr.IsDataStart && !attr.IsInlineDataStart) { return(-1); } FormatDescriptor dfd = attr.DataDescriptor; // Is this numeric/Address or numeric/Symbol? if ((dfd.FormatType != FormatDescriptor.Type.NumericLE && dfd.FormatType != FormatDescriptor.Type.NumericBE) || (dfd.FormatSubType != FormatDescriptor.SubType.Address && dfd.FormatSubType != FormatDescriptor.SubType.Symbol)) { return(-1); } // Treat like an absolute address. Convert the operand // to an address, then resolve the file offset. int address = RawData.GetWord(proj.FileData, offset, dfd.Length, (dfd.FormatType == FormatDescriptor.Type.NumericBE)); if (dfd.Length < 3) { // Add the program bank where the data bank should go. Not perfect but // we don't have anything better at the moment. address |= attr.Address & 0x7fff0000; } int operandOffset = proj.AddrMap.AddressToOffset(offset, address); return(operandOffset); }
/// <summary> /// Computes the number of lines of output required to hold the formatted output. /// </summary> /// <param name="formatter">Format definition.</param> /// <param name="dfd">Data format descriptor.</param> /// <returns>Line count.</returns> public static int ComputeRequiredLineCount(Formatter formatter, FormatDescriptor dfd) { switch (dfd.FormatType) { case FormatDescriptor.Type.Default: case FormatDescriptor.Type.NumericLE: case FormatDescriptor.Type.NumericBE: case FormatDescriptor.Type.Fill: return(1); case FormatDescriptor.Type.Dense: { // no delimiter, two output bytes per input byte int maxLen = MAX_OPERAND_LEN; int textLen = dfd.Length * 2; return((textLen + maxLen - 1) / maxLen); } case FormatDescriptor.Type.String: { // Subtract two chars, to leave room for start/end delimiter. We use // non-ASCII delimiters on-screen, so there's nothing to escape there. int maxLen = MAX_OPERAND_LEN - 2; // Remove leading length or trailing null byte from string length. int textLen = dfd.Length; switch (dfd.FormatSubType) { case FormatDescriptor.SubType.None: case FormatDescriptor.SubType.Dci: case FormatDescriptor.SubType.Reverse: case FormatDescriptor.SubType.DciReverse: break; case FormatDescriptor.SubType.CString: case FormatDescriptor.SubType.L8String: textLen--; break; case FormatDescriptor.SubType.L16String: textLen -= 2; break; default: Debug.Assert(false); break; } int strLen = (textLen + maxLen - 1) / maxLen; if (strLen == 0) { // Empty string, but we still need to output a line. strLen = 1; } return(strLen); } default: Debug.Assert(false); return(1); } }
/// <summary> /// Constructs a DefSymbol from a Symbol and a format descriptor. This is used /// for project symbols. /// </summary> /// <param name="sym">Base symbol.</param> /// <param name="dfd">Format descriptor.</param> /// <param name="comment">End-of-line comment.</param> public DefSymbol(Symbol sym, FormatDescriptor dfd, string comment) : this(sym.Label, sym.Value, sym.SymbolSource, sym.SymbolType) { Debug.Assert(comment != null); DataDescriptor = dfd; Comment = comment; Tag = string.Empty; }
public SerFormatDescriptor(FormatDescriptor dfd) { Length = dfd.Length; Format = dfd.FormatType.ToString(); SubFormat = dfd.FormatSubType.ToString(); if (dfd.SymbolRef != null) { SymbolRef = new SerWeakSymbolRef(dfd.SymbolRef); } }
/// <summary> /// Sets the string data entry for the specified offset. /// </summary> /// <param name="offset">File offset.</param> /// <param name="lines">String data.</param> /// <param name="pseudoOpcode">Pseudo-opcode for this line.</param> /// <param name="formatter">Formatter dependency.</param> /// <param name="formatDescriptor">FormatDescriptor dependency.</param> /// <param name="pseudoOpNames">PseudoOpNames dependency.</param> public void SetStringEntry(int offset, List <string> lines, string pseudoOpcode, Formatter formatter, FormatDescriptor formatDescriptor, PseudoOp.PseudoOpNames pseudoOpNames) { Debug.Assert(lines != null); FormattedStringEntry fse = new FormattedStringEntry(lines, pseudoOpcode, formatter, formatDescriptor, pseudoOpNames); mStringEntries[offset] = fse; }
/// <summary> /// Creates an UndoableChange for an operand or data format update. /// </summary> /// <param name="offset">Affected offset.</param> /// <param name="oldFormat">Current format. May be null.</param> /// <param name="newFormat">New format. May be null.</param> /// <returns>Change record.</returns> public static UndoableChange CreateOperandFormatChange(int offset, FormatDescriptor oldFormat, FormatDescriptor newFormat) { if (oldFormat == newFormat) { Debug.WriteLine("No-op format change at +" + offset.ToString("x6") + ": " + oldFormat); } // We currently allow old/new formats with different lengths. There doesn't // seem to be a reason not to, and a slight performance advantage to doing so. // Also, if a change set has two changes at the same offset, undo requires // enumerating the list in reverse order. UndoableChange uc = new UndoableChange(); uc.Type = ChangeType.SetOperandFormat; uc.Offset = offset; uc.OldValue = oldFormat; uc.NewValue = newFormat; // Data-only reanalysis is required if the old or new format has a label. Simply // changing from e.g. default to decimal, or decimal to binary, doesn't matter. // (The format editing code ensures that labels don't appear in the middle of // a formatted region.) Adding, removing, or changing a symbol can change the // layout of uncategorized data, affect data targets, xrefs, etc. // // We can't only check for a symbol, though, because Numeric/Address will // create an auto-label if the reference is within the file. // // If the number of bytes covered by the format changes, or we're adding or // removing a format, we need to redo the analysis of uncategorized data. For // example, an auto-detected string could get larger or smaller. We don't // currently have a separate flag for just that. Also, because we're focused // on just one change, we can't skip reanalysis when (say) one 4-byte numeric // is converted to two two-byte numerics. if ((oldFormat != null && oldFormat.HasSymbolOrAddress) || (newFormat != null && newFormat.HasSymbolOrAddress)) { uc.ReanalysisRequired = ReanalysisScope.DataOnly; } else if (oldFormat == null || newFormat == null || oldFormat.Length != newFormat.Length) { uc.ReanalysisRequired = ReanalysisScope.DataOnly; } else { uc.ReanalysisRequired = ReanalysisScope.None; } return(uc); }
/// <summary> /// Constructor. /// </summary> /// <param name="label">Symbol's label.</param> /// <param name="value">Symbol's value.</param> /// <param name="source">Symbol source (general point of origin).</param> /// <param name="type">Symbol type.</param> /// <param name="formatSubType">Format descriptor sub-type, so we know how the /// user wants the value to be displayed.</param> /// <param name="comment">End-of-line comment.</param> /// <param name="tag">Symbol tag, used for grouping platform symbols.</param> public DefSymbol(string label, int value, Source source, Type type, FormatDescriptor.SubType formatSubType, string comment, string tag) : this(label, value, source, type) { Debug.Assert(comment != null); Debug.Assert(tag != null); // Length doesn't matter; use 1 to get prefab object. DataDescriptor = FormatDescriptor.Create(1, FormatDescriptor.Type.NumericLE, formatSubType); Comment = comment; Tag = tag; }
/// <summary> /// Checks the entry's dependencies. /// </summary> /// <remarks> /// The data analyzer regenerates stuff in Anattribs, so we can't expect to have /// the same FormatDescriptor object. /// </remarks> /// <returns>True if the dependencies match.</returns> public bool CheckDeps(Formatter formatter, FormatDescriptor formatDescriptor, PseudoOp.PseudoOpNames pseudoOpNames) { bool ok = (ReferenceEquals(mFormatter, formatter) && ReferenceEquals(mPseudoOpNames, pseudoOpNames) && mFormatDescriptor == formatDescriptor); //if (!ok) { // Debug.WriteLine("CheckDeps:" + // (ReferenceEquals(mFormatter, formatter) ? "" : " fmt") + // (ReferenceEquals(mPseudoOpNames, pseudoOpNames) ? "" : " pop") + // (mFormatDescriptor == formatDescriptor ? "" : " dfd")); //} return(ok); }
public FormattedStringEntry(List <string> lines, string popcode, Formatter formatter, FormatDescriptor formatDescriptor, PseudoOp.PseudoOpNames pseudoOpNames) { // Can't be sure the list won't change, so duplicate it. Lines = new List <string>(lines.Count); foreach (string str in lines) { Lines.Add(str); } PseudoOpcode = popcode; mFormatter = formatter; mFormatDescriptor = formatDescriptor; mPseudoOpNames = pseudoOpNames; }
/// <summary> /// Create a DefSymbol given a Symbol, FormatDescriptor, and a few other things. Used /// for deserialization. /// </summary> /// <param name="sym">Base symbol.</param> /// <param name="dfd">Format descriptor.</param> /// <param name="widthSpecified">Set if a width was explicitly specified.</param> /// <param name="comment">End-of-line comment.</param> /// <param name="direction">I/O direction.</param> /// <param name="multiMask">Bit mask to apply before comparisons.</param> public static DefSymbol Create(Symbol sym, FormatDescriptor dfd, bool widthSpecified, string comment, DirectionFlags direction, MultiAddressMask multiMask) { int width = dfd.Length; if (widthSpecified && sym.SymbolType == Type.Constant && sym.SymbolSource != Source.Variable) { // non-variable constants don't have a width; override arg Debug.WriteLine("Overriding constant DefSymbol width"); widthSpecified = false; } Debug.Assert(dfd.FormatType == FormatDescriptor.Type.NumericLE); return(new DefSymbol(sym.Label, sym.Value, sym.SymbolSource, sym.SymbolType, dfd.FormatSubType, width, widthSpecified, comment, direction, multiMask, string.Empty)); }
/// <summary> /// Debugging utility function to dump a sorted list of objects. /// </summary> public static void DebugDumpSortedList(SortedList <int, FormatDescriptor> list) { if (list == null) { Debug.WriteLine("FormatDescriptor list is empty"); return; } Debug.WriteLine("FormatDescriptor list (" + list.Count + " entries)"); foreach (KeyValuePair <int, FormatDescriptor> kvp in list) { int offset = kvp.Key; FormatDescriptor dfd = kvp.Value; Debug.WriteLine(" +" + offset.ToString("x6") + ",+" + (offset + dfd.Length - 1).ToString("x6") + ": " + dfd.FormatType + "(" + dfd.FormatSubType + ")"); } }
/// <summary> /// Retrieves the formatted string data for the specified offset. /// </summary> /// <param name="offset">File offset.</param> /// <param name="formatter">Formatter dependency.</param> /// <param name="formatDescriptor">FormatDescriptor dependency.</param> /// <param name="pseudoOpNames">PseudoOpNames dependency.</param> /// <param name="PseudoOpcode">Pseudo-op for this string.</param> /// <returns>A reference to the string list. The caller must not modify the /// list.</returns> public List <string> GetStringEntry(int offset, Formatter formatter, FormatDescriptor formatDescriptor, PseudoOp.PseudoOpNames pseudoOpNames, out string PseudoOpcode) { PseudoOpcode = null; if (!mStringEntries.TryGetValue(offset, out FormattedStringEntry entry)) { return(null); } if (!entry.CheckDeps(formatter, formatDescriptor, pseudoOpNames)) { //Debug.WriteLine(" stale entry at +" + offset.ToString("x6")); return(null); } PseudoOpcode = entry.PseudoOpcode; return(entry.Lines); }
/// <summary> /// Creates an UndoableChange for an operand or data format update. This method /// refuses to create a change for a no-op, returning null instead. This will /// convert a FormatDescriptor with type REMOVE to null, with the intention of /// removing the descriptor from the format set. /// </summary> /// <param name="offset">Affected offset.</param> /// <param name="oldFormat">Current format. May be null.</param> /// <param name="newFormat">New format. May be null.</param> /// <returns>Change record, or null for a no-op change.</returns> public static UndoableChange CreateActualOperandFormatChange(int offset, FormatDescriptor oldFormat, FormatDescriptor newFormat) { if (newFormat != null && newFormat.FormatType == FormatDescriptor.Type.REMOVE) { Debug.WriteLine("CreateOperandFormatChange: converting REMOVE to null"); newFormat = null; } if (oldFormat == newFormat) { Debug.WriteLine("No-op format change at +" + offset.ToString("x6") + ": " + oldFormat); return(null); } return(CreateOperandFormatChange(offset, oldFormat, newFormat)); }
/// <summary> /// Creates a FormatDescriptor from a SerFormatDescriptor. /// </summary> /// <param name="sfd">Deserialized data.</param> /// <param name="report">Error report object.</param> /// <param name="dfd">Created FormatDescriptor.</param> /// <returns>True on success.</returns> private static bool CreateFormatDescriptor(SerFormatDescriptor sfd, FileLoadReport report, out FormatDescriptor dfd) { dfd = null; FormatDescriptor.Type format; FormatDescriptor.SubType subFormat; try { format = (FormatDescriptor.Type)Enum.Parse( typeof(FormatDescriptor.Type), sfd.Format); subFormat = (FormatDescriptor.SubType)Enum.Parse( typeof(FormatDescriptor.SubType), sfd.SubFormat); } catch (ArgumentException) { report.Add(FileLoadItem.Type.Warning, Properties.Resources.ERR_BAD_FD_FORMAT + ": " + sfd.Format + "/" + sfd.SubFormat); return(false); } if (sfd.SymbolRef == null) { dfd = FormatDescriptor.Create(sfd.Length, format, subFormat); } else { WeakSymbolRef.Part part; try { part = (WeakSymbolRef.Part)Enum.Parse( typeof(WeakSymbolRef.Part), sfd.SymbolRef.Part); } catch (ArgumentException) { report.Add(FileLoadItem.Type.Warning, Properties.Resources.ERR_BAD_SYMREF_PART + ": " + sfd.SymbolRef.Part); return(false); } dfd = FormatDescriptor.Create(sfd.Length, new WeakSymbolRef(sfd.SymbolRef.Label, part), format == FormatDescriptor.Type.NumericBE); } return(true); }
/// <summary> /// Constructor. General form. /// </summary> /// <param name="label">Symbol's label.</param> /// <param name="value">Symbol's value.</param> /// <param name="source">Symbol source (general point of origin).</param> /// <param name="type">Symbol type.</param> /// <param name="formatSubType">Format descriptor sub-type, so we know how the /// user wants the value to be displayed.</param> /// <param name="width">Variable width.</param> /// <param name="widthSpecified">True if width was explicitly specified. If this is /// <param name="comment">End-of-line comment.</param> /// <param name="direction">I/O direction.</param> /// <param name="multiMask">Bit mask to apply before comparisons.</param> /// <param name="tag">Symbol tag, used for grouping platform symbols.</param> /// false, the value of the "width" argument is ignored.</param> public DefSymbol(string label, int value, Source source, Type type, LabelAnnotation labelAnno, FormatDescriptor.SubType formatSubType, int width, bool widthSpecified, string comment, DirectionFlags direction, MultiAddressMask multiMask, string tag) : this(label, value, source, type, labelAnno) { Debug.Assert(comment != null); Debug.Assert(tag != null); if (widthSpecified && type == Type.Constant && source != Source.Variable) { // non-variable constants don't have a width; override arg Debug.WriteLine("Overriding constant DefSymbol width"); widthSpecified = false; } HasWidth = widthSpecified; if (!widthSpecified) { width = DEFAULT_WIDTH; } Debug.Assert(width >= MIN_WIDTH && width <= MAX_WIDTH); DataDescriptor = FormatDescriptor.Create(width, FormatDescriptor.Type.NumericLE, formatSubType); Comment = comment; Debug.Assert(((int)direction & ~(int)DirectionFlags.ReadWrite) == 0); Direction = direction; // constants don't have masks if (type != Type.Constant) { MultiMask = multiMask; } Tag = tag; }
/// <summary> /// Format the symbol and adjustment using common expression syntax. /// </summary> private static void FormatNumericSymbolSimple(Formatter formatter, Symbol sym, Dictionary <string, string> labelMap, FormatDescriptor dfd, int operandValue, int operandLen, bool isPcRel, StringBuilder sb) { // We could have some simple code that generated correct output, shifting and // masking every time, but that's ugly and annoying. For single-byte ops we can // just use the byte-select operators, for wider ops we get only as fancy as we // need to be. int adjustment, symbolValue; string symLabel = sym.Label; if (labelMap != null && labelMap.TryGetValue(symLabel, out string newLabel)) { symLabel = newLabel; } if (operandLen == 1) { // Use the byte-selection operator to get the right piece. string selOp; if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.Bank) { symbolValue = (sym.Value >> 16) & 0xff; selOp = "^"; } else if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.High) { symbolValue = (sym.Value >> 8) & 0xff; selOp = ">"; } else { symbolValue = sym.Value & 0xff; if (symbolValue == sym.Value) { selOp = string.Empty; } else { selOp = "<"; } } sb.Append(selOp); sb.Append(symLabel); operandValue &= 0xff; } else if (operandLen <= 4) { // Operands and values should be 8/16/24 bit unsigned quantities. 32-bit // support is really there so you can have a 24-bit pointer in a 32-bit hole. // Might need to adjust this if 32-bit signed quantities become interesting. uint mask = 0xffffffff >> ((4 - operandLen) * 8); string shOp; if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.Bank) { symbolValue = (sym.Value >> 16); shOp = " >> 16"; } else if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.High) { symbolValue = (sym.Value >> 8); shOp = " >> 8"; } else { symbolValue = sym.Value; shOp = ""; } if (isPcRel) { // PC-relative operands are funny, because an 8- or 16-bit value is always // expanded to 24 bits. We output a 16-bit value that the assembler will // convert back to 8-bit or 16-bit. In any event, the bank byte is never // relevant to our computations. operandValue &= 0xffff; symbolValue &= 0xffff; } sb.Append(symLabel); sb.Append(shOp); if (symbolValue > mask) { // Post-shift value won't fit in an operand-size box. symbolValue = (int)(symbolValue & mask); sb.Append(" & "); sb.Append(formatter.FormatHexValue((int)mask, 2)); } if (sb.Length != symLabel.Length) { sb.Append(' '); } operandValue = (int)(operandValue & mask); } else { Debug.Assert(false, "bad numeric len"); sb.Append("?????"); symbolValue = 0; } adjustment = operandValue - symbolValue; sb.Append(formatter.FormatAdjustment(adjustment)); }
/// <summary> /// Creates a FormatDescriptor from a SerFormatDescriptor. /// </summary> /// <param name="sfd">Deserialized data.</param> /// <param name="version">Serialization version (CONTENT_VERSION).</param> /// <param name="report">Error report object.</param> /// <param name="dfd">Created FormatDescriptor.</param> /// <returns>True on success.</returns> private static bool CreateFormatDescriptor(SerFormatDescriptor sfd, int version, FileLoadReport report, out FormatDescriptor dfd) { dfd = null; FormatDescriptor.Type format; FormatDescriptor.SubType subFormat; if ("String".Equals(sfd.Format)) { // File version 1 used a different set of enumerated values for defining strings. // Parse it out here. Debug.Assert(version <= 1); subFormat = FormatDescriptor.SubType.ASCII_GENERIC; if ("None".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringGeneric; } else if ("Reverse".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringReverse; } else if ("CString".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringNullTerm; } else if ("L8String".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringL8; } else if ("L16String".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringL16; } else if ("Dci".Equals(sfd.SubFormat)) { format = FormatDescriptor.Type.StringDci; } else if ("DciReverse".Equals(sfd.SubFormat)) { // No longer supported. Nobody ever used this but the regression tests, // though, so there's no reason to handle this nicely. format = FormatDescriptor.Type.Dense; subFormat = FormatDescriptor.SubType.None; } else { // No idea what this is; output as dense hex. format = FormatDescriptor.Type.Dense; subFormat = FormatDescriptor.SubType.None; } Debug.WriteLine("Found v1 string, fmt=" + format + ", sub=" + subFormat); dfd = FormatDescriptor.Create(sfd.Length, format, subFormat); return(dfd != null); } try { format = (FormatDescriptor.Type)Enum.Parse( typeof(FormatDescriptor.Type), sfd.Format); if (version <= 1 && "Ascii".Equals(sfd.SubFormat)) { // File version 1 used "Ascii" for all character data in numeric operands. // It applied to both low and high ASCII. subFormat = FormatDescriptor.SubType.ASCII_GENERIC; Debug.WriteLine("Found v1 char, fmt=" + sfd.Format + ", sub=" + sfd.SubFormat); } else { subFormat = (FormatDescriptor.SubType)Enum.Parse( typeof(FormatDescriptor.SubType), sfd.SubFormat); } } catch (ArgumentException) { report.Add(FileLoadItem.Type.Warning, Res.Strings.ERR_BAD_FD_FORMAT + ": " + sfd.Format + "/" + sfd.SubFormat); return(false); } if (sfd.SymbolRef == null) { dfd = FormatDescriptor.Create(sfd.Length, format, subFormat); } else { WeakSymbolRef.Part part; try { part = (WeakSymbolRef.Part)Enum.Parse( typeof(WeakSymbolRef.Part), sfd.SymbolRef.Part); } catch (ArgumentException) { report.Add(FileLoadItem.Type.Warning, Res.Strings.ERR_BAD_SYMREF_PART + ": " + sfd.SymbolRef.Part); return(false); } dfd = FormatDescriptor.Create(sfd.Length, new WeakSymbolRef(sfd.SymbolRef.Label, part), format == FormatDescriptor.Type.NumericBE); } return(dfd != null); }
/// <summary> /// Format a numeric operand value according to the specified sub-format. /// </summary> /// <param name="formatter">Text formatter.</param> /// <param name="symbolTable">Full table of project symbols.</param> /// <param name="lvLookup">Local variable lookup object. May be null if not /// formatting an instruction.</param> /// <param name="labelMap">Symbol label remap, for local label conversion. May be /// null.</param> /// <param name="dfd">Operand format descriptor.</param> /// <param name="offset">Offset of start of instruction or data pseudo-op, for /// variable name lookup. Okay to pass -1 when not formatting an instruction.</param> /// <param name="operandValue">Operand's value. For most things this comes directly /// out of the code, for relative branches it's a 24-bit absolute address.</param> /// <param name="operandLen">Length of operand, in bytes. For an instruction, this /// does not include the opcode byte. For a relative branch, this will be 2.</param> /// <param name="flags">Special handling.</param> public static string FormatNumericOperand(Formatter formatter, SymbolTable symbolTable, LocalVariableLookup lvLookup, Dictionary <string, string> labelMap, FormatDescriptor dfd, int offset, int operandValue, int operandLen, FormatNumericOpFlags flags) { Debug.Assert(operandLen > 0); int hexMinLen = operandLen * 2; switch (dfd.FormatSubType) { case FormatDescriptor.SubType.None: case FormatDescriptor.SubType.Hex: case FormatDescriptor.SubType.Address: return(formatter.FormatHexValue(operandValue, hexMinLen)); case FormatDescriptor.SubType.Decimal: return(formatter.FormatDecimalValue(operandValue)); case FormatDescriptor.SubType.Binary: return(formatter.FormatBinaryValue(operandValue, hexMinLen * 4)); case FormatDescriptor.SubType.Ascii: case FormatDescriptor.SubType.HighAscii: case FormatDescriptor.SubType.C64Petscii: case FormatDescriptor.SubType.C64Screen: CharEncoding.Encoding enc = SubTypeToEnc(dfd.FormatSubType); return(formatter.FormatCharacterValue(operandValue, enc)); case FormatDescriptor.SubType.Symbol: if (lvLookup != null && dfd.SymbolRef.IsVariable) { Debug.Assert(operandLen == 1); // only doing 8-bit stuff DefSymbol defSym = lvLookup.GetSymbol(offset, dfd.SymbolRef); if (defSym != null) { StringBuilder sb = new StringBuilder(); FormatNumericSymbolCommon(formatter, defSym, null, dfd, operandValue, operandLen, flags, sb); return(sb.ToString()); } else { Debug.WriteLine("Local variable format failed"); Debug.Assert(false); return(formatter.FormatHexValue(operandValue, hexMinLen)); } } else if (symbolTable.TryGetNonVariableValue(dfd.SymbolRef.Label, out Symbol sym)) { StringBuilder sb = new StringBuilder(); switch (formatter.ExpressionMode) { case Formatter.FormatConfig.ExpressionMode.Common: FormatNumericSymbolCommon(formatter, sym, labelMap, dfd, operandValue, operandLen, flags, sb); break; case Formatter.FormatConfig.ExpressionMode.Cc65: FormatNumericSymbolCc65(formatter, sym, labelMap, dfd, operandValue, operandLen, flags, sb); break; case Formatter.FormatConfig.ExpressionMode.Merlin: FormatNumericSymbolMerlin(formatter, sym, labelMap, dfd, operandValue, operandLen, flags, sb); break; default: Debug.Assert(false, "Unknown expression mode " + formatter.ExpressionMode); return("???"); } return(sb.ToString()); } else { return(formatter.FormatHexValue(operandValue, hexMinLen)); } default: // should not see REMOVE or ASCII_GENERIC here Debug.Assert(false); return("???"); } }
/// <summary> /// Creates a FormatDescriptor in the Anattrib array at srcOffset that links to /// targetOffset, or a nearby label. If targetOffset doesn't have a useful label, /// one will be generated. /// /// This is used for both instruction and data operands. /// </summary> /// <param name="srcOffset">Offset of instruction or address data.</param> /// <param name="srcLen">Length of instruction or data item.</param> /// <param name="targetOffset">Offset of target.</param> private void SetDataTarget(int srcOffset, int srcLen, int targetOffset) { // NOTE: don't try to cache mAnattribs[targetOffset] -- we may be changing // targetOffset and/or altering the Anattrib entry, so grabbing a copy of the // struct may lead to problems. // If the target offset has a symbol assigned, use it. Otherwise, try to // find something nearby that might be more appropriate. int origTargetOffset = targetOffset; if (mAnattribs[targetOffset].Symbol == null) { if (mAnalysisParams.SeekNearbyTargets) { targetOffset = FindAlternateTarget(srcOffset, targetOffset); } // If we're not interested in seeking nearby targets, or we are but we failed // to find something useful, we need to make sure that we're not pointing // into the middle of the instruction. The assembler will only see labels on // the opcode bytes, so if we're pointing at the middle we need to back up. if (mAnattribs[targetOffset].IsInstruction && !mAnattribs[targetOffset].IsInstructionStart) { while (!mAnattribs[--targetOffset].IsInstructionStart) { // Should not be possible to move past the start of the file, // since we know we're in the middle of an instruction. Debug.Assert(targetOffset > 0); } } else if (!mAnattribs[targetOffset].IsInstruction && !mAnattribs[targetOffset].IsStart) { // This is not part of an instruction, and is not the start of a formatted // data area. However, it might be part of a formatted data area, in which // case we need to avoid creating an auto label in the middle. So we seek // backward, looking for the first offset with a descriptor. If that // descriptor includes this offset, we set the target offset to that. // (Note the uncategorized data pass hasn't run yet, so only instructions // and offsets identified by users or scripts have been categorized.) int scanOffset = targetOffset; while (--scanOffset > 0) { FormatDescriptor dfd = mAnattribs[scanOffset].DataDescriptor; if (dfd != null && scanOffset + dfd.Length > targetOffset) { // Descriptor encompasses target offset. Adjust target. targetOffset = scanOffset; break; } } } } if (mAnattribs[targetOffset].Symbol == null) { // No label at target offset, generate one. // // Generally speaking, the label we generate will be unique, because it // incorporates the address. It's possible through various means to end // up with a user or platform label that matches an auto label, so we // need to do some renaming in that case. Shouldn't happen often. Symbol sym = SymbolTable.GenerateUniqueForAddress(mAnattribs[targetOffset].Address, mProject.SymbolTable); mAnattribs[targetOffset].Symbol = sym; // This will throw if the symbol already exists. That is the desired // behavior, as that would be a bug. mProject.SymbolTable.Add(sym); } // Create a Numeric/Symbol descriptor that references the target label. If the // source offset already had a descriptor (e.g. Numeric/Address data item), // this will replace it in the Anattrib array. (The user-specified format // is unaffected.) // // Doing this by target symbol, rather than offset in a Numeric/Address item, // allows us to avoid carrying the adjustment stuff everywhere. OTOH we have // to manually refactor label renames in the display list if we don't want to // redo the data analysis. bool isBigEndian = false; if (mAnattribs[srcOffset].DataDescriptor != null) { LogD(srcOffset, "Replacing " + mAnattribs[srcOffset].DataDescriptor + " with reference to " + mAnattribs[targetOffset].Symbol.Label + ", adj=" + (origTargetOffset - targetOffset)); if (mAnattribs[srcOffset].DataDescriptor.FormatType == FormatDescriptor.Type.NumericBE) { isBigEndian = true; } } else { LogV(srcOffset, "Creating weak reference to label " + mAnattribs[targetOffset].Symbol.Label + ", adj=" + (origTargetOffset - targetOffset)); } mAnattribs[srcOffset].DataDescriptor = FormatDescriptor.Create(srcLen, new WeakSymbolRef(mAnattribs[targetOffset].Symbol.Label, WeakSymbolRef.Part.Low), isBigEndian); }
/// <summary> /// Analyzes instruction operands and Address data descriptors to identify references /// to offsets within the file. /// /// Instructions with format descriptors are left alone. Instructions with /// operand offsets but no descriptor will have a descriptor generated /// using the label at the target offset; if the target offset is unlabeled, /// a unique label will be generated. Data descriptors with type=Address are /// handled the same way. /// /// In some cases, such as a reference to the middle of an instruction, we will /// label a nearby location instead. /// /// This should be called after code analysis has run, user labels and format /// descriptors have been applied, and platform/project symbols have been merged /// into the symbol table. /// </summary> /// <returns>True on success.</returns> public void AnalyzeDataTargets() { mDebugLog.LogI("Analyzing data targets..."); for (int offset = 0; offset < mAnattribs.Length; offset++) { Anattrib attr = mAnattribs[offset]; if (attr.IsInstructionStart) { if (attr.DataDescriptor != null) { // It's being shown as numeric, or as a reference to some other symbol. // Either way there's nothing further for us to do. (Technically we // would want to treat it like the no-descriptor case if the type was // numeric/Address, but we don't allow that for instructions.) Debug.Assert(attr.DataDescriptor.FormatSubType != FormatDescriptor.SubType.Address); continue; } int operandOffset = attr.OperandOffset; if (operandOffset >= 0) { // This is an offset reference: a branch or data access instruction whose // target is inside the file. Create a FormatDescriptor for it, and // generate a label at the target if one is not already present. SetDataTarget(offset, attr.Length, operandOffset); } // We advance by a single byte, rather than .Length, in case there's // an instruction embedded inside another one. } else if (attr.DataDescriptor != null) { // We can't check IsDataStart / IsInlineDataStart because the bytes might // still be uncategorized. If there's a user-specified format, check it // to see if it's an address. FormatDescriptor dfd = attr.DataDescriptor; // Is this numeric/Address? if ((dfd.FormatType == FormatDescriptor.Type.NumericLE || dfd.FormatType == FormatDescriptor.Type.NumericBE) && dfd.FormatSubType == FormatDescriptor.SubType.Address) { // Treat like an absolute address. Convert the operand // to an address, then resolve the file offset. int address = RawData.GetWord(mFileData, offset, dfd.Length, (dfd.FormatType == FormatDescriptor.Type.NumericBE)); if (dfd.Length < 3) { // Bank not specified by data, add current program bank. Not always // correct, but should be often enough. In most cases we'd just // assume a correct data bank register, but here we need to find // a file offset, so we have to assume data bank == program bank // (unless we find a good way to track the data bank register). address |= attr.Address & 0x7fff0000; } int operandOffset = mProject.AddrMap.AddressToOffset(offset, address); if (operandOffset >= 0) { SetDataTarget(offset, dfd.Length, operandOffset); } } // For other formats, we don't need to do anything. Numeric/Address is // the only one that represents an offset reference. Numeric/Symbol // is a name reference. The others are just data. // There shouldn't be any data items inside other data items, so we // can just skip forward. offset += mAnattribs[offset].DataDescriptor.Length - 1; } } }
/// <summary> /// Analyzes uncategorized regions of the file to see if they fit common patterns. /// /// This is re-run after most changes to the project, so we don't want to do anything /// crazily expensive. /// </summary> /// <returns>True on success.</returns> public void AnalyzeUncategorized() { // TODO(someday): we can make this faster. The data doesn't change, so we // only need to do a full scan once, when the file is first loaded. We can // create a TypedRangeSet for runs of identical bytes, using the byte value // as the type. A second TypedRangeSet would identify runs of ASCII chars, // with different types for high/low ASCII (and PETSCII?). AnalyzeRange() would // then just need to find the intersection with the sets, which should be // significantly faster. We would need to re-do the scan if the parameters // for things like min match length change. FormatDescriptor oneByteDefault = FormatDescriptor.Create(1, FormatDescriptor.Type.Default, FormatDescriptor.SubType.None); FormatDescriptor.DebugPrefabBump(-1); // If it hasn't been identified as code or data, set the "data" flag to // give it a positive identification as data. (This should be the only // place outside of CodeAnalysis that sets this flag.) This isn't strictly // necessary, but it helps us assert things when pieces start moving around. for (int offset = 0; offset < mAnattribs.Length; offset++) { Anattrib attr = mAnattribs[offset]; if (attr.IsInlineData) { // While we're here, add a default format descriptor for inline data // that doesn't have one. We don't try to analyze it otherwise. if (attr.DataDescriptor == null) { mAnattribs[offset].DataDescriptor = oneByteDefault; FormatDescriptor.DebugPrefabBump(); } } else if (!attr.IsInstruction) { mAnattribs[offset].IsData = true; } } mDebugLog.LogI("Analyzing uncategorized data..."); int startOffset = -1; for (int offset = 0; offset < mAnattribs.Length;) { // We want to find a contiguous series of offsets which are not known // to hold code or data. We stop if we encounter a user-defined label // or format descriptor. Anattrib attr = mAnattribs[offset]; if (attr.IsInstruction || attr.IsInlineData || attr.IsDataStart) { // Instruction, inline data, or formatted data known to be here. Analyze // previous chunk, then advance past this. if (startOffset >= 0) { AnalyzeRange(startOffset, offset - 1); startOffset = -1; } if (attr.IsInstruction) { // Because of embedded instructions, we can't simply leap forward. offset++; } else { Debug.Assert(attr.Length > 0); offset += attr.Length; } } else if (attr.Symbol != null || mProject.HasCommentOrNote(offset)) { // In an uncategorized area, but we want to break at this byte // so the user or auto label doesn't get buried in the middle of // a large chunk. // // This is similar to, but independent of, GroupedOffsetSetFromSelected() // in ProjectView. This is for auto-detection, the other is for user // selection. It's best if the two behave similarly though. if (startOffset >= 0) { AnalyzeRange(startOffset, offset - 1); } startOffset = offset; offset++; } else { // This offset is uncategorized, keep gathering. if (startOffset < 0) { startOffset = offset; } offset++; // Check to see if the address has changed from the previous entry. if (offset < mAnattribs.Length && mAnattribs[offset - 1].Address + 1 != mAnattribs[offset].Address) { // Must be an ORG here. Scan previous region. AnalyzeRange(startOffset, offset - 1); startOffset = -1; } } } if (startOffset >= 0) { AnalyzeRange(startOffset, mAnattribs.Length - 1); } }
/// <summary> /// Loads platform symbols. /// </summary> /// <param name="fileIdent">External file identifier of symbol file.</param> /// <param name="projectDir">Full path to project directory.</param> /// <param name="loadOrdinal">Platform file load order.</param> /// <param name="report">Report of warnings and errors.</param> /// <returns>True on success (no errors), false on failure.</returns> public bool LoadFromFile(string fileIdent, string projectDir, int loadOrdinal, out FileLoadReport report) { report = new FileLoadReport(fileIdent); ExternalFile ef = ExternalFile.CreateFromIdent(fileIdent); if (ef == null) { report.Add(FileLoadItem.Type.Error, CommonUtil.Properties.Resources.ERR_FILE_NOT_FOUND + ": " + fileIdent); return(false); } string pathName = ef.GetPathName(projectDir); if (pathName == null) { report.Add(FileLoadItem.Type.Error, Res.Strings.ERR_BAD_IDENT + ": " + fileIdent); return(false); } // These files shouldn't be enormous. Just read the entire thing into a string array. string[] lines; try { lines = File.ReadAllLines(pathName); } catch (IOException ioe) { Debug.WriteLine("Platform symbol load failed: " + ioe); report.Add(FileLoadItem.Type.Error, CommonUtil.Properties.Resources.ERR_FILE_NOT_FOUND + ": " + pathName); return(false); } string tag = string.Empty; DefSymbol.MultiAddressMask multiMask = null; int lineNum = 0; foreach (string line in lines) { lineNum++; // first line is line 1, says Vim and VisualStudio if (string.IsNullOrEmpty(line) || line[0] == ';') { // ignore } else if (line[0] == '*') { if (line.StartsWith(TAG_CMD)) { tag = ParseTag(line); } else if (line.StartsWith(MULTI_MASK_CMD)) { if (!ParseMask(line, out multiMask, out string badMaskMsg)) { report.Add(lineNum, FileLoadItem.NO_COLUMN, FileLoadItem.Type.Warning, badMaskMsg); } //Debug.WriteLine("Mask is now " + mask.ToString("x6")); } else { // Do something clever with *SYNOPSIS? Debug.WriteLine("Ignoring CMD: " + line); } } else { MatchCollection matches = sNameValueRegex.Matches(line); if (matches.Count == 1) { string label = matches[0].Groups[GROUP_NAME].Value; char typeAndDir = matches[0].Groups[GROUP_TYPE].Value[0]; bool isConst = (typeAndDir == '='); DefSymbol.DirectionFlags direction = DefSymbol.DirectionFlags.ReadWrite; if (typeAndDir == '<') { direction = DefSymbol.DirectionFlags.Read; } else if (typeAndDir == '>') { direction = DefSymbol.DirectionFlags.Write; } string badParseMsg; int value, numBase; bool parseOk; string valueStr = matches[0].Groups[GROUP_VALUE].Value; if (isConst) { // Allow various numeric options, and preserve the value. We // don't limit the value range. parseOk = Asm65.Number.TryParseInt(valueStr, out value, out numBase); badParseMsg = CommonUtil.Properties.Resources.ERR_INVALID_NUMERIC_CONSTANT; } else if (valueStr.ToUpperInvariant().Equals(ERASE_VALUE_STR)) { parseOk = true; value = ERASE_VALUE; numBase = 10; badParseMsg = CommonUtil.Properties.Resources.ERR_INVALID_ADDRESS; } else { // Allow things like "05/1000". Always hex. numBase = 16; parseOk = Asm65.Address.ParseAddress(valueStr, (1 << 24) - 1, out value); // limit to positive 24-bit values parseOk &= (value >= 0 && value < 0x01000000); badParseMsg = CommonUtil.Properties.Resources.ERR_INVALID_ADDRESS; } int width = -1; string widthStr = matches[0].Groups[GROUP_WIDTH].Value; if (parseOk && !string.IsNullOrEmpty(widthStr)) { parseOk = Asm65.Number.TryParseInt(widthStr, out width, out int ignoredBase); if (parseOk) { if (width < DefSymbol.MIN_WIDTH || width > DefSymbol.MAX_WIDTH) { parseOk = false; badParseMsg = Res.Strings.ERR_INVALID_WIDTH; } } else { badParseMsg = CommonUtil.Properties.Resources.ERR_INVALID_NUMERIC_CONSTANT; } } if (parseOk && multiMask != null && !isConst) { // We need to ensure that all possible values fit within the mask. // We don't test AddressValue here, because it's okay for the // canonical value to be outside the masked range. int testWidth = (width > 0) ? width : 1; for (int testValue = value; testValue < value + testWidth; testValue++) { if ((testValue & multiMask.CompareMask) != multiMask.CompareValue) { parseOk = false; badParseMsg = Res.Strings.ERR_VALUE_INCOMPATIBLE_WITH_MASK; Debug.WriteLine("Mask FAIL: value=" + value.ToString("x6") + " width=" + width + " testValue=" + testValue.ToString("x6") + " mask=" + multiMask); break; } } } if (!parseOk) { report.Add(lineNum, FileLoadItem.NO_COLUMN, FileLoadItem.Type.Warning, badParseMsg); } else { string comment = matches[0].Groups[GROUP_COMMENT].Value; if (comment.Length > 0) { // remove ';' comment = comment.Substring(1); } FormatDescriptor.SubType subType = FormatDescriptor.GetSubTypeForBase(numBase); DefSymbol symDef = new DefSymbol(label, value, Symbol.Source.Platform, isConst ? Symbol.Type.Constant : Symbol.Type.ExternalAddr, subType, width, width > 0, comment, direction, multiMask, tag, loadOrdinal, fileIdent); if (mSymbols.ContainsKey(label)) { // This is very easy to do -- just define the same symbol twice // in the same file. We don't really need to do anything about // it though. Debug.WriteLine("NOTE: stomping previous definition of " + label); } mSymbols[label] = symDef; } } else { report.Add(lineNum, FileLoadItem.NO_COLUMN, FileLoadItem.Type.Warning, CommonUtil.Properties.Resources.ERR_SYNTAX); } } } return(!report.HasErrors); }
/// <summary> /// Format the symbol and adjustment using cc65 expression syntax. /// </summary> private static void FormatNumericSymbolCc65(Formatter formatter, Symbol sym, Dictionary <string, string> labelMap, FormatDescriptor dfd, int operandValue, int operandLen, FormatNumericOpFlags flags, StringBuilder sb) { // The key difference between cc65 and other assemblers with general expressions // is that the bitwise shift and AND operators have higher precedence than the // arithmetic ops like add and subtract. (The bitwise ops are equal to multiply // and divide.) This means that, if we want to mask off the low 16 bits and add one // to a label, we can write "start & $ffff + 1" rather than "(start & $ffff) + 1". // // This is particularly convenient for PEA, since "PEA (start & $ffff)" looks like // we're trying to use a (non-existent) indirect form of PEA. We can write things // in a simpler way. int adjustment, symbolValue; string symLabel = sym.Label; if (labelMap != null && labelMap.TryGetValue(symLabel, out string newLabel)) { symLabel = newLabel; } if (operandLen == 1) { // Use the byte-selection operator to get the right piece. string selOp; if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.Bank) { symbolValue = (sym.Value >> 16) & 0xff; selOp = "^"; } else if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.High) { symbolValue = (sym.Value >> 8) & 0xff; selOp = ">"; } else { symbolValue = sym.Value & 0xff; if (symbolValue == sym.Value) { selOp = string.Empty; } else { selOp = "<"; } } sb.Append(selOp); sb.Append(symLabel); operandValue &= 0xff; } else if (operandLen <= 4) { uint mask = 0xffffffff >> ((4 - operandLen) * 8); string shOp; if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.Bank) { symbolValue = (sym.Value >> 16); shOp = " >> 16"; } else if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.High) { symbolValue = (sym.Value >> 8); shOp = " >> 8"; } else { symbolValue = sym.Value; shOp = ""; } if (flags == FormatNumericOpFlags.IsPcRel) { // PC-relative operands are funny, because an 8- or 16-bit value is always // expanded to 24 bits. We output a 16-bit value that the assembler will // convert back to 8-bit or 16-bit. In any event, the bank byte is never // relevant to our computations. operandValue &= 0xffff; symbolValue &= 0xffff; } sb.Append(symLabel); sb.Append(shOp); if (symbolValue > mask) { // Post-shift value won't fit in an operand-size box. symbolValue = (int)(symbolValue & mask); sb.Append(" & "); sb.Append(formatter.FormatHexValue((int)mask, 2)); } operandValue = (int)(operandValue & mask); if (sb.Length != symLabel.Length) { sb.Append(' '); } } else { Debug.Assert(false, "bad numeric len"); sb.Append("?????"); symbolValue = 0; } adjustment = operandValue - symbolValue; sb.Append(formatter.FormatAdjustment(adjustment)); }
/// <summary> /// Format the symbol and adjustment using common expression syntax. /// </summary> private static void FormatNumericSymbolCommon(Formatter formatter, Symbol sym, Dictionary <string, string> labelMap, FormatDescriptor dfd, int operandValue, int operandLen, FormatNumericOpFlags flags, StringBuilder sb) { // We could have some simple code that generated correct output, shifting and // masking every time, but that's ugly and annoying. For single-byte ops we can // just use the byte-select operators, for wider ops we get only as fancy as we // need to be. int adjustment, symbolValue; string symLabel = sym.Label; if (labelMap != null && labelMap.TryGetValue(symLabel, out string newLabel)) { symLabel = newLabel; } if (operandLen == 1) { // Use the byte-selection operator to get the right piece. In 64tass the // selection operator has a very low precedence, similar to Merlin 32. string selOp; if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.Bank) { symbolValue = (sym.Value >> 16) & 0xff; if (formatter.Config.mBankSelectBackQuote) { selOp = "`"; } else { selOp = "^"; } } else if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.High) { symbolValue = (sym.Value >> 8) & 0xff; selOp = ">"; } else { symbolValue = sym.Value & 0xff; if (symbolValue == sym.Value) { selOp = string.Empty; } else { selOp = "<"; } } operandValue &= 0xff; if (operandValue != symbolValue && dfd.SymbolRef.ValuePart != WeakSymbolRef.Part.Low) { // Adjustment is required to an upper-byte part. sb.Append('('); sb.Append(selOp); sb.Append(symLabel); sb.Append(')'); } else { // no adjustment required sb.Append(selOp); sb.Append(symLabel); } } else if (operandLen <= 4) { // Operands and values should be 8/16/24 bit unsigned quantities. 32-bit // support is really there so you can have a 24-bit pointer in a 32-bit hole. // Might need to adjust this if 32-bit signed quantities become interesting. uint mask = 0xffffffff >> ((4 - operandLen) * 8); int rightShift; if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.Bank) { symbolValue = (sym.Value >> 16); rightShift = 16; } else if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.High) { symbolValue = (sym.Value >> 8); rightShift = 8; } else { symbolValue = sym.Value; rightShift = 0; } if (flags == FormatNumericOpFlags.IsPcRel) { // PC-relative operands are funny, because an 8- or 16-bit value is always // expanded to 24 bits. We output a 16-bit value that the assembler will // convert back to 8-bit or 16-bit. In any event, the bank byte is never // relevant to our computations. operandValue &= 0xffff; symbolValue &= 0xffff; } bool needMask = false; if (symbolValue > mask) { // Post-shift value won't fit in an operand-size box. symbolValue = (int)(symbolValue & mask); needMask = true; } operandValue = (int)(operandValue & mask); // Generate one of: // label [+ adj] // (label >> rightShift) [+ adj] // (label & mask) [+ adj] // ((label >> rightShift) & mask) [+ adj] if (rightShift != 0 || needMask) { if (flags != FormatNumericOpFlags.HasHashPrefix) { sb.Append("0+"); } if (rightShift != 0 && needMask) { sb.Append("(("); } else { sb.Append("("); } } sb.Append(symLabel); if (rightShift != 0) { sb.Append(" >> "); sb.Append(rightShift.ToString()); sb.Append(')'); } if (needMask) { sb.Append(" & "); sb.Append(formatter.FormatHexValue((int)mask, 2)); sb.Append(')'); } } else { Debug.Assert(false, "bad numeric len"); sb.Append("?????"); symbolValue = 0; } adjustment = operandValue - symbolValue; sb.Append(formatter.FormatAdjustment(adjustment)); }
/// <summary> /// Format a numeric operand value according to the specified sub-format. /// </summary> /// <param name="formatter">Text formatter.</param> /// <param name="symbolTable">Full table of project symbols.</param> /// <param name="labelMap">Symbol label remap, for local label conversion. May be /// null.</param> /// <param name="dfd">Operand format descriptor.</param> /// <param name="operandValue">Operand's value. For most things this comes directly /// out of the code, for relative branches it's a 24-bit absolute address.</param> /// <param name="operandLen">Length of operand, in bytes. For an instruction, this /// does not include the opcode byte. For a relative branch, this will be 2.</param> /// <param name="flags">Special handling.</param> public static string FormatNumericOperand(Formatter formatter, SymbolTable symbolTable, Dictionary <string, string> labelMap, FormatDescriptor dfd, int operandValue, int operandLen, FormatNumericOpFlags flags) { Debug.Assert(operandLen > 0); int hexMinLen = operandLen * 2; switch (dfd.FormatSubType) { case FormatDescriptor.SubType.None: case FormatDescriptor.SubType.Hex: case FormatDescriptor.SubType.Address: return(formatter.FormatHexValue(operandValue, hexMinLen)); case FormatDescriptor.SubType.Decimal: return(formatter.FormatDecimalValue(operandValue)); case FormatDescriptor.SubType.Binary: return(formatter.FormatBinaryValue(operandValue, hexMinLen * 4)); case FormatDescriptor.SubType.Ascii: return(formatter.FormatAsciiOrHex(operandValue)); case FormatDescriptor.SubType.Symbol: if (symbolTable.TryGetValue(dfd.SymbolRef.Label, out Symbol sym)) { StringBuilder sb = new StringBuilder(); switch (formatter.ExpressionMode) { case Formatter.FormatConfig.ExpressionMode.Common: FormatNumericSymbolCommon(formatter, sym, labelMap, dfd, operandValue, operandLen, flags, sb); break; case Formatter.FormatConfig.ExpressionMode.Cc65: FormatNumericSymbolCc65(formatter, sym, labelMap, dfd, operandValue, operandLen, flags, sb); break; case Formatter.FormatConfig.ExpressionMode.Merlin: FormatNumericSymbolMerlin(formatter, sym, labelMap, dfd, operandValue, operandLen, flags, sb); break; default: Debug.Assert(false, "Unknown expression mode " + formatter.ExpressionMode); return("???"); } return(sb.ToString()); } else { return(formatter.FormatHexValue(operandValue, hexMinLen)); } default: Debug.Assert(false); return("???"); } }
/// <summary> /// Generates a pseudo-op statement for the specified data operation. /// /// For most operations, only one output line will be generated. For larger items, /// like long comments, the value may be split into multiple lines. The sub-index /// indicates which line should be formatted. /// </summary> /// <param name="formatter">Format definition.</param> /// <param name="opNames">Table of pseudo-op names.</param> /// <param name="symbolTable">Project symbol table.</param> /// <param name="labelMap">Symbol label map. May be null.</param> /// <param name="dfd">Data format descriptor.</param> /// <param name="data">File data array.</param> /// <param name="offset">Start offset.</param> /// <param name="subIndex">For multi-line items, which line.</param> public static PseudoOut FormatDataOp(Formatter formatter, PseudoOpNames opNames, SymbolTable symbolTable, Dictionary <string, string> labelMap, FormatDescriptor dfd, byte[] data, int offset, int subIndex) { if (dfd == null) { // should never happen //Debug.Assert(false, "Null dfd at offset+" + offset.ToString("x6")); PseudoOut failed = new PseudoOut(); failed.Opcode = failed.Operand = "!FAILED!+" + offset.ToString("x6"); return(failed); } int length = dfd.Length; Debug.Assert(length > 0); // All outputs for a given offset show the same offset and length, even for // multi-line items. PseudoOut po = new PseudoOut(); switch (dfd.FormatType) { case FormatDescriptor.Type.Default: if (length != 1) { // This shouldn't happen. Debug.Assert(false); length = 1; } po.Opcode = opNames.GetDefineData(length); int operand = RawData.GetWord(data, offset, length, false); po.Operand = formatter.FormatHexValue(operand, length * 2); break; case FormatDescriptor.Type.NumericLE: po.Opcode = opNames.GetDefineData(length); operand = RawData.GetWord(data, offset, length, false); po.Operand = FormatNumericOperand(formatter, symbolTable, labelMap, dfd, operand, length, FormatNumericOpFlags.None); break; case FormatDescriptor.Type.NumericBE: po.Opcode = opNames.GetDefineBigData(length); operand = RawData.GetWord(data, offset, length, true); po.Operand = FormatNumericOperand(formatter, symbolTable, labelMap, dfd, operand, length, FormatNumericOpFlags.None); break; case FormatDescriptor.Type.Fill: po.Opcode = opNames.Fill; po.Operand = length + "," + formatter.FormatHexValue(data[offset], 2); break; case FormatDescriptor.Type.Dense: { int maxPerLine = MAX_OPERAND_LEN / 2; offset += subIndex * maxPerLine; length -= subIndex * maxPerLine; if (length > maxPerLine) { length = maxPerLine; } po.Opcode = opNames.Dense; po.Operand = formatter.FormatDenseHex(data, offset, length); //List<PseudoOut> outList = new List<PseudoOut>(); //GenerateTextLines(text, "", "", po, outList); //po = outList[subIndex]; } break; case FormatDescriptor.Type.String: // It's hard to do strings in single-line pieces because of prefix lengths, // terminating nulls, DCI polarity, and reverse-order strings. We // really just want to convert the whole thing to a run of chars // and then pull out a chunk. As an optimization we can handle // generic strings (subtype=None) more efficiently, which should solve // the problem of massive strings created by auto-analysis. if (dfd.FormatSubType == FormatDescriptor.SubType.None) { int maxPerLine = MAX_OPERAND_LEN - 2; offset += subIndex * maxPerLine; length -= subIndex * maxPerLine; if (length > maxPerLine) { length = maxPerLine; } char[] ltext = BytesToChars(formatter, opNames, dfd.FormatSubType, data, offset, length, out string lpopcode, out int unused); po.Opcode = lpopcode; po.Operand = "\u201c" + new string(ltext) + "\u201d"; } else { char[] text = BytesToChars(formatter, opNames, dfd.FormatSubType, data, offset, length, out string popcode, out int showHexZeroes); if (showHexZeroes == 1) { po.Opcode = opNames.DefineData1; po.Operand = formatter.FormatHexValue(0, 2); } else if (showHexZeroes == 2) { po.Opcode = opNames.DefineData2; po.Operand = formatter.FormatHexValue(0, 4); } else { Debug.Assert(showHexZeroes == 0); po.Opcode = popcode; List <PseudoOut> outList = new List <PseudoOut>(); GenerateTextLines(text, "\u201c", "\u201d", po, outList); po = outList[subIndex]; } } break; default: Debug.Assert(false); po.Opcode = ".???"; po.Operand = "$" + data[offset].ToString("x2"); break; } return(po); }
/// <summary> /// Analyzes a range of bytes, looking for opportunities to promote uncategorized /// data to a more structured form. /// </summary> /// <param name="start">Offset of first byte in range.</param> /// <param name="end">Offset of last byte in range.</param> private void AnalyzeRange(int start, int end) { // TODO(someday): consider copying the buffer into a string and using Regex. This // can be done fairly quickly with "unsafe" code, e.g.: // https://stackoverflow.com/questions/3028768/net-regular-expressions-on-bytes-instead-of-chars // Could be useful for ASCII stuff and the repeated-byte detector, e.g.: // https://stackoverflow.com/questions/1660694/regular-expression-to-match-any-character-being-repeated-more-than-10-times mDebugLog.LogI("Analyzing +" + start.ToString("x6") + " - +" + end.ToString("x6")); int minStringChars = mAnalysisParams.MinCharsForString; bool doAnalysis = mAnalysisParams.AnalyzeUncategorizedData; FormatDescriptor oneByteDefault = FormatDescriptor.Create(1, FormatDescriptor.Type.Default, FormatDescriptor.SubType.None); FormatDescriptor.DebugPrefabBump(-1); while (start <= end) { if (!doAnalysis) { // Analysis is disabled, so just mark everything as single-byte data. mAnattribs[start].DataDescriptor = oneByteDefault; FormatDescriptor.DebugPrefabBump(); start++; continue; } // Check for block of repeated values. int length = RecognizeRun(mFileData, start, end); bool isAscii = TextUtil.IsPrintableAscii((char)(mFileData[start] & 0x7f)); if (length >= MIN_RUN_LENGTH) { // Output as run or ASCII string. Prefer ASCII if the string is short // enough to fit on one line (e.g. 64 chars including delimiters) and // meets the minimum string length threshold. if (isAscii && length <= MIN_RUN_LENGTH_ASCII && length >= minStringChars) { // string -- if we create the descriptor here, we save a little time, // but strings like "*****hello" turn into two separate strings. //LogV(start, "String from run of '" + (char)(mFileData[start] & 0x7f) + // "': " + length + " bytes"); //mAnattribs[start].DataDescriptor = FormatDescriptor.CreateDescriptor( // length, FormatDescriptor.Type.String, // FormatDescriptor.SubType.None); //start += length; //continue; } else { // run LogV(start, "Run of 0x" + mFileData[start].ToString("x2") + ": " + length + " bytes"); mAnattribs[start].DataDescriptor = FormatDescriptor.Create( length, FormatDescriptor.Type.Fill, FormatDescriptor.SubType.None); start += length; continue; } } length = RecognizeAscii(mFileData, start, end); if (length >= minStringChars) { LogV(start, "ASCII string, len=" + length + " bytes"); mAnattribs[start].DataDescriptor = FormatDescriptor.Create(length, FormatDescriptor.Type.String, FormatDescriptor.SubType.None); start += length; continue; } // Nothing found, output as single byte. This is the easiest form for users // to edit. mAnattribs[start].DataDescriptor = oneByteDefault; FormatDescriptor.DebugPrefabBump(); // It's tempting to advance by the "length" result from RecognizeRun, and if // we were just looking for runs of identical bytes we could. However, that // would lose short ASCII strings that began with repeated bytes, e.g. "---%". start++; } }
/// <summary> /// Loads platform symbols. /// </summary> /// <param name="fileIdent">Relative pathname of file to open.</param> /// <param name="report">Report of warnings and errors.</param> /// <returns>True on success (no errors), false on failure.</returns> public bool LoadFromFile(string fileIdent, string projectDir, out FileLoadReport report) { // These files shouldn't be enormous. Do it the easy way. report = new FileLoadReport(fileIdent); ExternalFile ef = ExternalFile.CreateFromIdent(fileIdent); if (ef == null) { report.Add(FileLoadItem.Type.Error, CommonUtil.Properties.Resources.ERR_FILE_NOT_FOUND + ": " + fileIdent); return(false); } string pathName = ef.GetPathName(projectDir); if (pathName == null) { report.Add(FileLoadItem.Type.Error, Properties.Resources.ERR_BAD_IDENT + ": " + fileIdent); return(false); } string[] lines; try { lines = File.ReadAllLines(pathName); } catch (IOException ioe) { Debug.WriteLine("Platform symbol load failed: " + ioe); report.Add(FileLoadItem.Type.Error, CommonUtil.Properties.Resources.ERR_FILE_NOT_FOUND + ": " + pathName); return(false); } string tag = string.Empty; int lineNum = 0; foreach (string line in lines) { lineNum++; // first line is line 1, says Vim and VisualStudio if (string.IsNullOrEmpty(line) || line[0] == ';') { // ignore } else if (line[0] == '*') { if (line.StartsWith(TAG_CMD)) { tag = ParseTag(line); } else { // Do something clever with *SYNOPSIS? Debug.WriteLine("CMD: " + line); } } else { MatchCollection matches = sNameValueRegex.Matches(line); if (matches.Count == 1) { //Debug.WriteLine("GOT '" + matches[0].Groups[1] + "' " + // matches[0].Groups[2] + " '" + matches[0].Groups[3] + "'"); string label = matches[0].Groups[1].Value; bool isConst = (matches[0].Groups[2].Value[0] == '='); string badParseMsg; int value, numBase; bool parseOk; if (isConst) { // Allow various numeric options, and preserve the value. parseOk = Asm65.Number.TryParseInt(matches[0].Groups[3].Value, out value, out numBase); badParseMsg = CommonUtil.Properties.Resources.ERR_INVALID_NUMERIC_CONSTANT; } else { // Allow things like "05/1000". Always hex. numBase = 16; parseOk = Asm65.Address.ParseAddress(matches[0].Groups[3].Value, (1 << 24) - 1, out value); badParseMsg = CommonUtil.Properties.Resources.ERR_INVALID_ADDRESS; } if (!parseOk) { report.Add(lineNum, FileLoadItem.NO_COLUMN, FileLoadItem.Type.Warning, badParseMsg); } else { string comment = matches[0].Groups[4].Value; if (comment.Length > 0) { // remove ';' comment = comment.Substring(1); } FormatDescriptor.SubType subType = FormatDescriptor.GetSubTypeForBase(numBase); DefSymbol symDef = new DefSymbol(label, value, Symbol.Source.Platform, isConst ? Symbol.Type.Constant : Symbol.Type.ExternalAddr, subType, comment, tag); if (mSymbols.ContainsKey(label)) { // This is very easy to do -- just define the same symbol twice // in the same file. We don't really need to do anything about // it though. Debug.WriteLine("NOTE: stomping previous definition of " + label); } mSymbols[label] = symDef; } } else { report.Add(lineNum, FileLoadItem.NO_COLUMN, FileLoadItem.Type.Warning, CommonUtil.Properties.Resources.ERR_SYNTAX); } } } return(!report.HasErrors); }
/// <summary> /// Format the symbol and adjustment using Merlin expression syntax. /// </summary> private static void FormatNumericSymbolMerlin(Formatter formatter, Symbol sym, Dictionary <string, string> labelMap, FormatDescriptor dfd, int operandValue, int operandLen, FormatNumericOpFlags flags, StringBuilder sb) { // Merlin expressions are compatible with the original 8-bit Merlin. They're // evaluated from left to right, with (almost) no regard for operator precedence. // // The part-selection operators differ from "simple" in two ways: // (1) They always happen last. If FOO=$10f0, "#>FOO+$18" == $11. One of the // few cases where left-to-right evaluation is overridden. // (2) They select words, not bytes. If FOO=$123456, "#>FOO" is $1234. This is // best thought of as a shift operator, rather than byte-selection. For // 8-bit code this doesn't matter. // // This behavior leads to simpler expressions for simple symbol adjustments. string symLabel = sym.Label; if (labelMap != null && labelMap.TryGetValue(symLabel, out string newLabel)) { symLabel = newLabel; } int adjustment; // If we add or subtract an adjustment, it will be done on the full value, which // is then shifted to the appropriate part. So we need to left-shift the operand // value to match. We fill in the low bytes with the contents of the symbol, so // that the adjustment doesn't include unnecessary values. (For example, let // FOO=$10f0, with operand "#>FOO" ($10). We shift the operand to get $1000, then // OR in the low byte to get $10f0, so that when we subtract we get adjustment==0.) int adjOperand, keepLen; if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.Bank) { adjOperand = operandValue << 16 | (int)(sym.Value & 0xff00ffff); keepLen = 3; } else if (dfd.SymbolRef.ValuePart == WeakSymbolRef.Part.High) { adjOperand = (operandValue << 8) | (sym.Value & 0xff); keepLen = 2; } else { adjOperand = operandValue; keepLen = 1; } keepLen = Math.Max(keepLen, operandLen); adjustment = adjOperand - sym.Value; if (keepLen == 1) { adjustment %= 256; // Adjust for aesthetics. The assembler implicitly applies a modulo operation, // so we can use the value closest to zero. if (adjustment > 127) { adjustment = -(256 - adjustment) /*% 256*/; } else if (adjustment < -128) { adjustment = (256 + adjustment) /*% 256*/; } } else if (keepLen == 2) { adjustment %= 65536; if (adjustment > 32767) { adjustment = -(65536 - adjustment) /*% 65536*/; } else if (adjustment < -32768) { adjustment = (65536 + adjustment) /*% 65536*/; } } // Use the label from sym, not dfd's weak ref; might be different if label // comparisons are case-insensitive. switch (dfd.SymbolRef.ValuePart) { case WeakSymbolRef.Part.Unknown: case WeakSymbolRef.Part.Low: // For Merlin, "<" is effectively a no-op. We can put it in for // aesthetics when grabbing the low byte of a 16-bit value. if ((operandLen == 1) && sym.Value > 0xff) { sb.Append('<'); } sb.Append(symLabel); break; case WeakSymbolRef.Part.High: sb.Append('>'); sb.Append(symLabel); break; case WeakSymbolRef.Part.Bank: sb.Append('^'); sb.Append(symLabel); break; default: Debug.Assert(false, "bad part"); sb.Append("???"); break; } sb.Append(formatter.FormatAdjustment(adjustment)); }