Ejemplo n.º 1
0
        /// <summary>
        /// Analyzes a range of bytes, looking for opportunities to promote uncategorized
        /// data to a more structured form.
        /// </summary>
        /// <param name="start">Offset of first byte in range.</param>
        /// <param name="end">Offset of last byte in range.</param>
        private void AnalyzeRange(int start, int end)
        {
            // TODO(someday): consider copying the buffer into a string and using Regex.  This
            //   can be done fairly quickly with "unsafe" code, e.g.:
            //   https://stackoverflow.com/questions/3028768/net-regular-expressions-on-bytes-instead-of-chars
            //   Could be useful for ASCII stuff and the repeated-byte detector, e.g.:
            //   https://stackoverflow.com/questions/1660694/regular-expression-to-match-any-character-being-repeated-more-than-10-times

            mDebugLog.LogI("Analyzing  +" + start.ToString("x6") + " - +" + end.ToString("x6"));

            int              minStringChars = mAnalysisParams.MinCharsForString;
            bool             doAnalysis     = mAnalysisParams.AnalyzeUncategorizedData;
            FormatDescriptor oneByteDefault = FormatDescriptor.Create(1,
                                                                      FormatDescriptor.Type.Default, FormatDescriptor.SubType.None);

            FormatDescriptor.DebugPrefabBump(-1);

            while (start <= end)
            {
                if (!doAnalysis)
                {
                    // Analysis is disabled, so just mark everything as single-byte data.
                    mAnattribs[start].DataDescriptor = oneByteDefault;
                    FormatDescriptor.DebugPrefabBump();
                    start++;
                    continue;
                }

                // Check for block of repeated values.
                int  length  = RecognizeRun(mFileData, start, end);
                bool isAscii = TextUtil.IsPrintableAscii((char)(mFileData[start] & 0x7f));
                if (length >= MIN_RUN_LENGTH)
                {
                    // Output as run or ASCII string.  Prefer ASCII if the string is short
                    // enough to fit on one line (e.g. 64 chars including delimiters) and
                    // meets the minimum string length threshold.
                    if (isAscii && length <= MIN_RUN_LENGTH_ASCII && length >= minStringChars)
                    {
                        // string -- if we create the descriptor here, we save a little time,
                        //  but strings like "*****hello" turn into two separate strings.
                        //LogV(start, "String from run of '" + (char)(mFileData[start] & 0x7f) +
                        //    "': " + length + " bytes");
                        //mAnattribs[start].DataDescriptor = FormatDescriptor.CreateDescriptor(
                        //    length, FormatDescriptor.Type.String,
                        //    FormatDescriptor.SubType.None);
                        //start += length;
                        //continue;
                    }
                    else
                    {
                        // run
                        LogV(start, "Run of 0x" + mFileData[start].ToString("x2") + ": " +
                             length + " bytes");
                        mAnattribs[start].DataDescriptor = FormatDescriptor.Create(
                            length, FormatDescriptor.Type.Fill,
                            FormatDescriptor.SubType.None);
                        start += length;
                        continue;
                    }
                }

                length = RecognizeAscii(mFileData, start, end);
                if (length >= minStringChars)
                {
                    LogV(start, "ASCII string, len=" + length + " bytes");
                    mAnattribs[start].DataDescriptor = FormatDescriptor.Create(length,
                                                                               FormatDescriptor.Type.String, FormatDescriptor.SubType.None);
                    start += length;
                    continue;
                }

                // Nothing found, output as single byte.  This is the easiest form for users
                // to edit.
                mAnattribs[start].DataDescriptor = oneByteDefault;
                FormatDescriptor.DebugPrefabBump();

                // It's tempting to advance by the "length" result from RecognizeRun, and if
                // we were just looking for runs of identical bytes we could.  However, that
                // would lose short ASCII strings that began with repeated bytes, e.g. "---%".

                start++;
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Analyzes uncategorized regions of the file to see if they fit common patterns.
        ///
        /// This is re-run after most changes to the project, so we don't want to do anything
        /// crazily expensive.
        /// </summary>
        /// <returns>True on success.</returns>
        public void AnalyzeUncategorized()
        {
            // TODO(someday): we can make this faster.  The data doesn't change, so we
            // only need to do a full scan once, when the file is first loaded.  We can
            // create a TypedRangeSet for runs of identical bytes, using the byte value
            // as the type.  A second TypedRangeSet would identify runs of ASCII chars,
            // with different types for high/low ASCII (and PETSCII?).  AnalyzeRange() would
            // then just need to find the intersection with the sets, which should be
            // significantly faster.  We would need to re-do the scan if the parameters
            // for things like min match length change.

            FormatDescriptor oneByteDefault = FormatDescriptor.Create(1,
                                                                      FormatDescriptor.Type.Default, FormatDescriptor.SubType.None);

            FormatDescriptor.DebugPrefabBump(-1);

            // If it hasn't been identified as code or data, set the "data" flag to
            // give it a positive identification as data.  (This should be the only
            // place outside of CodeAnalysis that sets this flag.)  This isn't strictly
            // necessary, but it helps us assert things when pieces start moving around.
            for (int offset = 0; offset < mAnattribs.Length; offset++)
            {
                Anattrib attr = mAnattribs[offset];
                if (attr.IsInlineData)
                {
                    // While we're here, add a default format descriptor for inline data
                    // that doesn't have one.  We don't try to analyze it otherwise.
                    if (attr.DataDescriptor == null)
                    {
                        mAnattribs[offset].DataDescriptor = oneByteDefault;
                        FormatDescriptor.DebugPrefabBump();
                    }
                }
                else if (!attr.IsInstruction)
                {
                    mAnattribs[offset].IsData = true;
                }
            }

            mDebugLog.LogI("Analyzing uncategorized data...");

            int startOffset = -1;

            for (int offset = 0; offset < mAnattribs.Length;)
            {
                // We want to find a contiguous series of offsets which are not known
                // to hold code or data.  We stop if we encounter a user-defined label
                // or format descriptor.
                Anattrib attr = mAnattribs[offset];

                if (attr.IsInstruction || attr.IsInlineData || attr.IsDataStart)
                {
                    // Instruction, inline data, or formatted data known to be here.  Analyze
                    // previous chunk, then advance past this.
                    if (startOffset >= 0)
                    {
                        AnalyzeRange(startOffset, offset - 1);
                        startOffset = -1;
                    }
                    if (attr.IsInstruction)
                    {
                        // Because of embedded instructions, we can't simply leap forward.
                        offset++;
                    }
                    else
                    {
                        Debug.Assert(attr.Length > 0);
                        offset += attr.Length;
                    }
                }
                else if (attr.Symbol != null || mProject.HasCommentOrNote(offset))
                {
                    // In an uncategorized area, but we want to break at this byte
                    // so the user or auto label doesn't get buried in the middle of
                    // a large chunk.
                    //
                    // This is similar to, but independent of, GroupedOffsetSetFromSelected()
                    // in ProjectView.  This is for auto-detection, the other is for user
                    // selection.  It's best if the two behave similarly though.
                    if (startOffset >= 0)
                    {
                        AnalyzeRange(startOffset, offset - 1);
                    }
                    startOffset = offset;
                    offset++;
                }
                else
                {
                    // This offset is uncategorized, keep gathering.
                    if (startOffset < 0)
                    {
                        startOffset = offset;
                    }
                    offset++;

                    // Check to see if the address has changed from the previous entry.
                    if (offset < mAnattribs.Length &&
                        mAnattribs[offset - 1].Address + 1 != mAnattribs[offset].Address)
                    {
                        // Must be an ORG here.  Scan previous region.
                        AnalyzeRange(startOffset, offset - 1);
                        startOffset = -1;
                    }
                }
            }
            if (startOffset >= 0)
            {
                AnalyzeRange(startOffset, mAnattribs.Length - 1);
            }
        }