Exemple #1
0
        public void AddEdge(Procedure caller, Procedure callee, XRef xref)
        {
            if (caller == null)
            {
                throw new ArgumentNullException("caller");
            }
            if (callee == null)
            {
                throw new ArgumentNullException("callee");
            }
            if (xref == null)
            {
                throw new ArgumentNullException("xref");
            }

            System.Diagnostics.Debug.Assert(procedures.Contains(caller));
            System.Diagnostics.Debug.Assert(procedures.Contains(callee));

            // TBD: check that the xref indeed refers to these two
            // procedures.
            XRef xCall = new XRef(
                type: xref.Type,
                source: caller.EntryPoint,
                target: callee.EntryPoint,
                dataLocation: xref.Source
                );

            graph.Add(xCall);
        }
        /// <summary>
        /// Creates an instruction starting at the given address. If the
        /// decoded instruction covers bytes that are already analyzed,
        /// returns null.
        /// </summary>
        /// <param name="image"></param>
        /// <param name="address"></param>
        /// <returns></returns>
        protected virtual Instruction CreateInstruction(Address address, XRef entry)
        {
            Instruction instruction = DecodeInstruction(address);

            if (instruction == null)
            {
                return(null);
            }

            // Check that the bytes covered by the decoded instruction are
            // unanalyzed.
            if (!image.CheckByteType(address, address + instruction.EncodedLength, ByteType.Unknown))
            {
                AddError(address, ErrorCode.OverlappingInstruction,
                         "Ran into the middle of code when processing block {0} referred from {1}",
                         entry.Target, entry.Source);
                return(null);
            }

            // Create a code piece for this instruction.
            image.UpdateByteType(address, address + instruction.EncodedLength, ByteType.Code);
            image.Instructions.Add(address, instruction);

            // Return the decoded instruction.
            return(instruction);
        }
Exemple #3
0
        public void AddEdge(BasicBlock source, BasicBlock target, XRef xref)
        {
            if (source == null)
            {
                throw new ArgumentNullException("source");
            }
            if (target == null)
            {
                throw new ArgumentNullException("target");
            }
            if (xref == null)
            {
                throw new ArgumentNullException("xref");
            }

            System.Diagnostics.Debug.Assert(blocks.Contains(source));
            System.Diagnostics.Debug.Assert(blocks.Contains(target));

            XRef xFlow = new XRef(
                type: xref.Type,
                source: source.Location,
                target: target.Location,
                dataLocation: xref.Source
                );

            graph.Add(xFlow);
        }
        protected virtual XRef CreateFallThroughXRef(Address source, Address target)
        {
            XRef xref = new XRef(
                type: XRefType.FallThrough,
                source: source,
                target: target
                );

            return(xref);
        }
Exemple #5
0
        /// <summary>
        /// Compares two XRef objects by source, target, and data location,
        /// in descending priority.
        /// </summary>
        public static int CompareByLocation(XRef x, XRef y)
        {
            int cmp = x.Source.CompareTo(y.Source);

            if (cmp == 0)
            {
                cmp = x.Target.CompareTo(y.Target);
            }
            if (cmp == 0)
            {
                cmp = x.DataLocation.CompareTo(y.DataLocation);
            }
            return(cmp);
        }
        private static void ComputeMore(
            HashAlgorithm hasher, Procedure procedure, BinaryImage image)
        {
            // TODO: add the traversal logic into Graph class.
            // or maybe GraphAlgorithms.Traversal(...).

            // Create a queue to simulate breadth-first-search. It doesn't
            // really matter whether DFS or BFS is used as long as we stick
            // to it, but BFS has the benefit that it's easier to understand.
            // Therefore we use it.
            Queue <Address> queue = new Queue <Address>();

            queue.Enqueue(procedure.EntryPoint);

            // Map the entry point address of a basic block to its index
            // in the sequence of blocks visited. Each block that is the
            // target of a none-fall-through control flow edge is assigned
            // an index the first time it is encountered. This index is
            // included in the hash to provide a hint of the graph's
            // structure.
            Dictionary <Address, int> visitOrder = new Dictionary <Address, int>();

            XRefCollection cfg = image.BasicBlocks.ControlFlowGraph.Graph;

            // Traverse the graph.
            while (queue.Count > 0)
            {
                Address source = queue.Dequeue();

                // Check if this block has been visited before. If it has,
                // we just hash its order and work on next one.
                int order;
                if (visitOrder.TryGetValue(source, out order)) // visited
                {
                    ComputeMore(hasher, order);
                    continue;
                }

                // If the block has not been visited, assign a unique order
                // to it, and hash this order.
                order = visitOrder.Count;
                visitOrder.Add(source, order);
                ComputeMore(hasher, order);

                // Next, we hash the instructions in the block. We follow any
                // fall-through edges so that the resulting hash will not be
                // affected by artificial blocks. To see this, consider the
                // following example:
                //
                // MySub:                LibSub1:
                //       mov ax, bx            mov ax, bx
                //                       LibSub2:
                //       mov bx, cx            mov bx, cx
                //       ret                   ret
                //
                // MySub and LibSub1 are identical procedures with three
                // instructions. However, if someone calls into the middle of
                // LibSub1, the block must be split in two and a fall-through
                // edge is created from LibSub1 to LibSub2. If we don't follow
                // the fall-through edge, it will generate a different hash
                // from the left-side one.
                while (true)
                {
                    BasicBlock block = image.BasicBlocks.Find(source);
                    System.Diagnostics.Debug.Assert(block != null);

                    // Hash the instructions in the block. Only the opcode
                    // part of each instruction is hashed; the displacement
                    // and immediate parts are potentially subject to fix-up,
                    // and are therefore ignored in the hash.
                    ComputeMore(hasher, block, image);

                    // Enumerate each block referred to from this block.
                    // We must order the (none-fall-through) outgoing flow
                    // edges in a way that depends only on the graph's
                    // structure and not on the particular arrangement of
                    // target blocks. (Note: this is not a concern if we only
                    // have one none-fall-through outgoing edge; but this may
                    // be of concern if we have multiple outgoing edges, such
                    // as in an indexed jump.)
                    //
                    // TBD: handle multiple outgoing edges.
                    XRef fallThroughEdge    = null;
                    XRef nonFallThroughEdge = null;
                    foreach (XRef flow in cfg.GetReferencesFrom(source))
                    {
                        if (flow.Type == XRefType.FallThrough)
                        {
                            if (fallThroughEdge != null)
                            {
                                throw new InvalidOperationException("Cannot have more than one fall-through edge.");
                            }
                            fallThroughEdge = flow;
                        }
                        else
                        {
                            if (nonFallThroughEdge != null)
                            {
                                throw new InvalidOperationException("Cannot have more than one non-fall-through edge.");
                            }
                            nonFallThroughEdge = flow;
                        }
                    }

                    // Hash the special flow type and add target to queue.
                    if (nonFallThroughEdge != null)
                    {
                        ComputeMore(hasher, (int)nonFallThroughEdge.Type);
                        queue.Enqueue(nonFallThroughEdge.Target);
                    }

                    // Fall through to the next block if any.
                    if (fallThroughEdge != null)
                    {
                        source = fallThroughEdge.Target;
                    }
                    else
                    {
                        break;
                    }
                }
            }
        }
        /// <summary>
        /// Analyzes code starting from the given location, and create basic
        /// blocks iteratively.
        /// </summary>
        public void GenerateBasicBlocks(Address entryPoint, XRefType entryType)
        {
            Address address = entryPoint;

            // Maintain a queue of basic block entry points to analyze. At
            // the beginning, only the user-specified entry point is in the
            // queue. As we encounter b/c/j instructions during the course
            // of analysis, we push the target addresses to the queue of
            // entry points to be analyzed later.
            PriorityQueue <XRef> xrefQueue =
                new PriorityQueue <XRef>(XRef.CompareByPriority);

            // Maintain a list of all procedure calls (with known target)
            // encountered during the analysis. After we finish analyzing
            // all the basic blocks, we update the list of procedures.
            // List<XRef> xrefCalls = new List<XRef>();

            // Create a a dummy xref entry using the user-supplied starting
            // address.
            xrefQueue.Enqueue(new XRef(
                                  type: entryType,
                                  source: Address.Invalid,
                                  target: entryPoint
                                  ));

            // Analyze each cross reference in order of their priority.
            // In particular, if the xref is an indexed jump, we delay its
            // processing until we have processed all other types of xrefs.
            // This reduces the chance that we process past the end of a
            // jump table.
            while (!xrefQueue.IsEmpty)
            {
                XRef entry = xrefQueue.Dequeue();

                // Handle jump table entry, whose Target == Invalid.
                if (entry.Type == XRefType.NearIndexedJump)
                {
                    System.Diagnostics.Debug.Assert(entry.Target == Address.Invalid);

                    // Fill the Target field to make it a static xref.
                    entry = ProcessJumpTableEntry(entry, xrefQueue);
                    if (entry == null) // end of jump table
                    {
                        continue;
                    }
                }

                // Skip other dynamic xrefs.
                if (entry.Target == Address.Invalid)
                {
                    CrossReferences.Add(entry);
                    continue;
                }

                // Process the basic block starting at the target address.
                BasicBlock block = AnalyzeBasicBlock(entry, xrefQueue);
                if (block != null)
                {
                    //int count = block.Length;
                    //int baseOffset = PointerToOffset(entry.Target);
                    //proc.CodeRange.AddInterval(baseOffset, baseOffset + count);
                    //proc.ByteRange.AddInterval(baseOffset, baseOffset + count);
                    //for (int j = 0; j < count; j++)
                    //{
                    //    image[baseOffset + j].Procedure = proc;
                    //}
#if false
                    proc.AddBasicBlock(block);
#endif
                }
                CrossReferences.Add(entry);
            }
        }
        /// <summary>
        /// Gets the image associated with the segment specified by its id.
        /// </summary>
        /// <param name="segmentId">Id of the segment to resolve.</param>
        /// <returns>The image associated with the given segment, or null if
        /// the segment id is invalid.</returns>
        //protected abstract ImageChunk ResolveSegment(int segmentId);

        #region Flow Analysis Methods

        /// <summary>
        /// Analyzes a contiguous sequence of instructions that form a basic
        /// block. A basic block terminates as soon as any of the following
        /// conditions is true:
        /// - An analysis error occurs
        /// - An block terminating instructions: RET, RETF, IRET, HLT.
        /// - A b/c/j instruction: Jcc, JMP, JMPF, LOOPcc.
        /// </summary>
        /// <param name="start">Address to begin analysis.</param>
        /// <param name="xrefs">Collection to add xrefs to.</param>
        /// <returns>
        /// A new BasicBlock if one was created during the analysis.
        /// If no new BasicBlocks are created, or if an existing block was
        /// split into two, returns null.
        /// </returns>
        // TODO: should be roll-back the entire basic block if we
        // encounters an error on our way? maybe not.
        protected virtual BasicBlock AnalyzeBasicBlock(XRef start, ICollection <XRef> xrefs)
        {
            Address ip = start.Target; // instruction pointer

            if (!image.IsAddressValid(ip))
            {
                AddError(ip, ErrorCode.OutOfImage,
                         "XRef target is outside of the image (referred from {0})",
                         start.Source);
                return(null);
            }

            // Check if the entry address is already analyzed.
            ByteAttribute b = image[ip];

            if (b.Type != ByteType.Unknown)
            {
                // Fail if we ran into data or padding while expecting code.
                if (b.Type != ByteType.Code)
                {
                    AddError(ip, ErrorCode.RanIntoData,
                             "XRef target is in the middle of data (referred from {0})",
                             start.Source);
                    return(null);
                }

                // Now the byte was previously analyzed as code. We must have
                // already created a basic block that contains this byte.
                BasicBlock block = BasicBlocks.Find(ip);
                System.Diagnostics.Debug.Assert(block != null);

                // If the existing block starts at this address, we're done.
                if (block.Location == ip)
                {
                    return(null);
                }

                // TBD: recover the following in some way...
#if false
                if (image[b.BasicBlock.StartAddress].Address.Segment != pos.Segment)
                {
                    AddError(pos, ErrorCategory.Error,
                             "Ran into the middle of a block [{0},{1}) from another segment " +
                             "when processing block {2} referred from {3}",
                             b.BasicBlock.StartAddress, b.BasicBlock.EndAddress,
                             start.Target, start.Source);
                    return(null);
                }
#endif

                // Now split the existing basic block into two. This requires
                // that the cut-off point is at instruction boundary.
                if (!b.IsLeadByte)
                {
                    AddError(ip, ErrorCode.RanIntoCode,
                             "XRef target is in the middle of an instruction (referred from {0})",
                             start.Source);
                    return(null);
                }
                BasicBlock[] subBlocks = BasicBlocks.SplitBasicBlock(block, ip, image);

                // Create a xref from the previous block to this block.
                XRef xref = CreateFallThroughXRef(GetLastInstructionInBasicBlock(subBlocks[0]), ip);
                xrefs.Add(xref);

                return(null);
            }
            // TODO: Move the above into a separate procedure.

            // Analyze each instruction in sequence until we encounter
            // analyzed code, flow instruction, or an error condition.
            BasicBlockType blockType = BasicBlockType.Unknown;
            while (true)
            {
                // Decode an instruction at this location.
                Address     instructionStart = ip;
                Instruction insn             = CreateInstruction(ip, start);
                if (insn == null)
                {
                    AddError(ip, ErrorCode.BrokenBasicBlock,
                             "Basic block ended prematurally because of invalid instruction.");
                    blockType = BasicBlockType.Broken;
                    break;
                }
                Address instructionEnd = ip + insn.EncodedLength;

                // Advance the instruction pointer.
                ip = instructionEnd;

                // Determine whether this instruction affects control flow.
                XRefType flowType = GetFlowInstructionType(insn.Operation);

                if (flowType != XRefType.None)
                {
                    // Creates an active cross reference if necessary.
                    if (NeedsActiveXRef(flowType))
                    {
                        XRef xref = CreateFlowXRef(flowType, instructionStart, insn);
                        if (xref != null)
                        {
                            xrefs.Add(xref);
                        }
                    }

                    // Creates a fall-through cross reference if necessary.
                    if (CanFallThrough(flowType))
                    {
                        XRef xref = CreateFallThroughXRef(instructionStart, instructionEnd);
                        xrefs.Add(xref);
                    }

                    // Terminate the block.
                    blockType = GetBasicBlockType(flowType);
                    break;
                }

                // If the new location is already analyzed as code, create a
                // control-flow edge from the previous block to the existing
                // block, and we are done.
                if (!image.IsAddressValid(ip))
                {
                    blockType = BasicBlockType.Broken;
                    break;
                }
                if (image[ip].Type == ByteType.Code)
                {
                    System.Diagnostics.Debug.Assert(image[ip].IsLeadByte);

                    XRef xref = CreateFallThroughXRef(instructionStart, instructionEnd);
                    xrefs.Add(xref);
                    blockType = BasicBlockType.FallThrough;
                    break;
                }
            }

            // Create a basic block unless we failed on the first instruction.
            if (ip.Offset > start.Target.Offset)
            {
                BasicBlock block = new BasicBlock(start.Target, ip, blockType, image);
                BasicBlocks.Add(block);
            }
            return(null);
        }
        /// <summary>
        /// Fills the Target of an IndexedJump xref heuristically by plugging
        /// in the jump target stored in DataLocation and performing various
        /// sanity checks.
        /// </summary>
        /// <param name="entry">A xref of type IndexedJump whose Target field
        /// is Invalid.</param>
        /// <param name="xrefs">Collection to add a new dynamic IndexedJump
        /// xref to, if any.</param>
        /// <returns>The updated xref, or null if the jump table ends.</returns>
        private XRef ProcessJumpTableEntry(XRef entry, ICollection <XRef> xrefs)
        {
#if true
            return(null);
#else
            System.Diagnostics.Debug.Assert(
                entry.Type == XRefType.NearIndexedJump &&
                entry.Target == LogicalAddress.Invalid,
                "Entry must be NearIndexedJump with unknown target");

            // Verify that the location that supposedly stores the jump table
            // entry is not analyzed as anything else. If it is, it indicates
            // that the jump table ends here.
            LinearPointer b = entry.DataLocation.LinearAddress;
            if (image[b].Type != ByteType.Unknown ||
                image[b + 1].Type != ByteType.Unknown)
            {
                return(null);
            }

            // If the data location looks like in another segment, stop.
            if (image.LargestSegmentThatStartsBefore(b)
                > entry.Source.Segment)
            {
                return(null);
            }

            // TBD: it's always a problem if CS:IP wraps. We need a more
            // general way to detect and fix it. For this particular case,
            // we need to check that the jump target is within the space
            // of this segment.
            if (entry.DataLocation.Offset >= 0xFFFE)
            {
                AddError(entry.DataLocation, ErrorCategory.Error,
                         "Jump table is too big (jumped from {0}).",
                         entry.Source);
                return(null);
            }

            // Find the target address of the jump table entry.
            ushort  jumpOffset = image.GetUInt16(b);
            Pointer jumpTarget = new Pointer(entry.Source.Segment, jumpOffset);

            // Check that the target address looks valid. If it doesn't, it
            // probably indicates that the jump table ends here.
            if (!image.IsAddressValid(jumpTarget.LinearAddress))
            {
                return(null);
            }

            // If the jump target is outside the range of the current segment
            // but inside the range of a later segment, it likely indicates
            // that the jump table ends here.
            // TBD: this heuristic is kind of a hack... we should do better.
#if true
            if (image.LargestSegmentThatStartsBefore(jumpTarget.LinearAddress)
                > entry.Source.Segment)
            {
                return(null);
            }
#endif

            // BUG: We really do need to check that the destination
            // is valid. If not, we should stop immediately.
            if (!(image[jumpTarget].Type == ByteType.Unknown ||
                  image[jumpTarget].Type == ByteType.Code &&
                  image[jumpTarget].IsLeadByte))
            {
                return(null);
            }

            // ...

            // Mark DataLocation as data and add it to the owning procedure's
            // byte range.
            Piece piece = image.CreatePiece(
                entry.DataLocation, entry.DataLocation + 2, ByteType.Data);
            Procedure proc = image[entry.Source].Procedure;
            proc.AddDataBlock(piece.StartAddress, piece.EndAddress);

            // Add a dynamic xref from the JMP instruction to the next jump
            // table entry.
            xrefs.Add(new XRef(
                          type: XRefType.NearIndexedJump,
                          source: entry.Source,
                          target: Pointer.Invalid,
                          dataLocation: entry.DataLocation + 2
                          ));

            // Return the updated xref with Target field filled.
            return(new XRef(
                       type: XRefType.NearIndexedJump,
                       source: entry.Source,
                       target: jumpTarget,
                       dataLocation: entry.DataLocation
                       ));
#endif
        }
        private BasicBlock AnalyzeBasicBlock(XRef start, ICollection <XRef> xrefs)
        {
            Pointer pos = start.Target;

            // Check if we are running into the middle of code or data. This
            // can only happen when we process the first instruction in the
            // block.
            if (image[pos].Type != ByteType.Unknown && !image[pos].IsLeadByte)
            {
                AddError(pos, ErrorCategory.Error,
                         "XRef target is in the middle of code/data (referred from {0})",
                         start.Source);
                return(null);
            }

            // Check if this location is already analyzed as code.
            if (image[pos].Type == ByteType.Code)
            {
                ByteProperties b = image[pos];

                // Now we are already covered by a basic block. If the
                // basic block *starts* from this address, do nothing.
                // Otherwise, split the basic block into two.
                if (b.BasicBlock.StartAddress == pos.LinearAddress)
                {
                    return(null);
                }
                else
                {
                    if (image[b.BasicBlock.StartAddress].Address.Segment != pos.Segment)
                    {
                        AddError(pos, ErrorCategory.Error,
                                 "Ran into the middle of a block [{0},{1}) from another segment " +
                                 "when processing block {2} referred from {3}",
                                 b.BasicBlock.StartAddress, b.BasicBlock.EndAddress,
                                 start.Target, start.Source);
                        return(null);
                    }
                    BasicBlock newBlock = b.BasicBlock.Split(pos.LinearAddress);
                    return(null); // newBlock;
                }
            }

            // Analyze each instruction in sequence until we encounter
            // analyzed code, flow instruction, or an error condition.
            while (true)
            {
                // Decode an instruction at this location.
                Pointer     insnPos = pos;
                Instruction insn;
                try
                {
                    insn = image.DecodeInstruction(pos);
                }
                catch (Exception ex)
                {
                    AddError(pos, ErrorCategory.Error, "Bad instruction: {0}", ex.Message);
                    break;
                }

                // Create a code piece for this instruction.
                if (!image.CheckByteType(pos, pos + insn.EncodedLength, ByteType.Unknown))
                {
                    AddError(pos,
                             "Ran into the middle of code when processing block {0} referred from {1}",
                             start.Target, start.Source);
                    break;
                }

                // Advance the byte pointer. Note: the IP may wrap around 0xFFFF
                // if pos.off + count > 0xFFFF. This is probably not intended.
                try
                {
                    Piece piece = image.CreatePiece(pos, pos + insn.EncodedLength, ByteType.Code);
                    pos += insn.EncodedLength;
                }
                catch (AddressWrappedException)
                {
                    AddError(pos, ErrorCategory.Error,
                             "CS:IP wrapped when processing block {1} referred from {2}",
                             start.Target, start.Source);
                    break;
                }

                // Check if this instruction terminates the block.
                if (insn.Operation == Operation.RET ||
                    insn.Operation == Operation.RETF ||
                    insn.Operation == Operation.HLT)
                {
                    break;
                }

                // Analyze BCJ (branch, jump, call) instructions. Such an
                // instruction will create a cross reference.
                XRef xref = AnalyzeFlowInstruction(insnPos, insn);
                if (xref != null)
                {
                    xrefs.Add(xref);

                    // If the instruction is a conditional jump, add xref to
                    // the 'no-jump' branch.
                    // TODO: adding a no-jump xref causes confusion when we
                    // browse xrefs in the disassembly listing window. Is it
                    // truely necessary to add these xrefs?
                    if (xref.Type == XRefType.ConditionalJump)
                    {
                        xrefs.Add(new XRef(
                                      type: XRefType.ConditionalJump,
                                      source: insnPos,
                                      target: pos
                                      ));
                    }

                    // Finish basic block unless this is a CALL instruction.
                    if (xref.Type == XRefType.ConditionalJump ||
                        xref.Type == XRefType.NearJump ||
                        xref.Type == XRefType.FarJump ||
                        xref.Type == XRefType.NearIndexedJump)
                    {
                        break;
                    }
                }

                // If the new location is already analyzed as code, create a
                // control-flow edge from the previous block to the existing
                // block, and we are done.
                if (image[pos].Type == ByteType.Code)
                {
                    System.Diagnostics.Debug.Assert(image[pos].IsLeadByte);
                    break;
                }
            }

            // Create a basic block unless we failed on the first instruction.
            if (pos.LinearAddress > start.Target.LinearAddress)
            {
                return(image.CreateBasicBlock(start.Target.LinearAddress, pos.LinearAddress));
            }
            else
            {
                return(null);
            }
        }
Exemple #11
0
 public LogicalXRefAddedEventArgs(XRef xref)
 {
     this.XRef = xref;
 }
Exemple #12
0
        /// <summary>
        /// Compares two XRef objects by priority (precedence). An XRef object
        /// with a smaller numeric Type value has higher precedence, and
        /// compare smaller (as in a min-priority queue).
        /// </summary>
        public static int CompareByPriority(XRef x, XRef y)
        {
            int cmp = (int)x.Type - (int)y.Type;

            return(cmp);
        }