public void AddEdge(Procedure caller, Procedure callee, XRef xref) { if (caller == null) { throw new ArgumentNullException("caller"); } if (callee == null) { throw new ArgumentNullException("callee"); } if (xref == null) { throw new ArgumentNullException("xref"); } System.Diagnostics.Debug.Assert(procedures.Contains(caller)); System.Diagnostics.Debug.Assert(procedures.Contains(callee)); // TBD: check that the xref indeed refers to these two // procedures. XRef xCall = new XRef( type: xref.Type, source: caller.EntryPoint, target: callee.EntryPoint, dataLocation: xref.Source ); graph.Add(xCall); }
/// <summary> /// Creates an instruction starting at the given address. If the /// decoded instruction covers bytes that are already analyzed, /// returns null. /// </summary> /// <param name="image"></param> /// <param name="address"></param> /// <returns></returns> protected virtual Instruction CreateInstruction(Address address, XRef entry) { Instruction instruction = DecodeInstruction(address); if (instruction == null) { return(null); } // Check that the bytes covered by the decoded instruction are // unanalyzed. if (!image.CheckByteType(address, address + instruction.EncodedLength, ByteType.Unknown)) { AddError(address, ErrorCode.OverlappingInstruction, "Ran into the middle of code when processing block {0} referred from {1}", entry.Target, entry.Source); return(null); } // Create a code piece for this instruction. image.UpdateByteType(address, address + instruction.EncodedLength, ByteType.Code); image.Instructions.Add(address, instruction); // Return the decoded instruction. return(instruction); }
public void AddEdge(BasicBlock source, BasicBlock target, XRef xref) { if (source == null) { throw new ArgumentNullException("source"); } if (target == null) { throw new ArgumentNullException("target"); } if (xref == null) { throw new ArgumentNullException("xref"); } System.Diagnostics.Debug.Assert(blocks.Contains(source)); System.Diagnostics.Debug.Assert(blocks.Contains(target)); XRef xFlow = new XRef( type: xref.Type, source: source.Location, target: target.Location, dataLocation: xref.Source ); graph.Add(xFlow); }
protected virtual XRef CreateFallThroughXRef(Address source, Address target) { XRef xref = new XRef( type: XRefType.FallThrough, source: source, target: target ); return(xref); }
/// <summary> /// Compares two XRef objects by source, target, and data location, /// in descending priority. /// </summary> public static int CompareByLocation(XRef x, XRef y) { int cmp = x.Source.CompareTo(y.Source); if (cmp == 0) { cmp = x.Target.CompareTo(y.Target); } if (cmp == 0) { cmp = x.DataLocation.CompareTo(y.DataLocation); } return(cmp); }
private static void ComputeMore( HashAlgorithm hasher, Procedure procedure, BinaryImage image) { // TODO: add the traversal logic into Graph class. // or maybe GraphAlgorithms.Traversal(...). // Create a queue to simulate breadth-first-search. It doesn't // really matter whether DFS or BFS is used as long as we stick // to it, but BFS has the benefit that it's easier to understand. // Therefore we use it. Queue <Address> queue = new Queue <Address>(); queue.Enqueue(procedure.EntryPoint); // Map the entry point address of a basic block to its index // in the sequence of blocks visited. Each block that is the // target of a none-fall-through control flow edge is assigned // an index the first time it is encountered. This index is // included in the hash to provide a hint of the graph's // structure. Dictionary <Address, int> visitOrder = new Dictionary <Address, int>(); XRefCollection cfg = image.BasicBlocks.ControlFlowGraph.Graph; // Traverse the graph. while (queue.Count > 0) { Address source = queue.Dequeue(); // Check if this block has been visited before. If it has, // we just hash its order and work on next one. int order; if (visitOrder.TryGetValue(source, out order)) // visited { ComputeMore(hasher, order); continue; } // If the block has not been visited, assign a unique order // to it, and hash this order. order = visitOrder.Count; visitOrder.Add(source, order); ComputeMore(hasher, order); // Next, we hash the instructions in the block. We follow any // fall-through edges so that the resulting hash will not be // affected by artificial blocks. To see this, consider the // following example: // // MySub: LibSub1: // mov ax, bx mov ax, bx // LibSub2: // mov bx, cx mov bx, cx // ret ret // // MySub and LibSub1 are identical procedures with three // instructions. However, if someone calls into the middle of // LibSub1, the block must be split in two and a fall-through // edge is created from LibSub1 to LibSub2. If we don't follow // the fall-through edge, it will generate a different hash // from the left-side one. while (true) { BasicBlock block = image.BasicBlocks.Find(source); System.Diagnostics.Debug.Assert(block != null); // Hash the instructions in the block. Only the opcode // part of each instruction is hashed; the displacement // and immediate parts are potentially subject to fix-up, // and are therefore ignored in the hash. ComputeMore(hasher, block, image); // Enumerate each block referred to from this block. // We must order the (none-fall-through) outgoing flow // edges in a way that depends only on the graph's // structure and not on the particular arrangement of // target blocks. (Note: this is not a concern if we only // have one none-fall-through outgoing edge; but this may // be of concern if we have multiple outgoing edges, such // as in an indexed jump.) // // TBD: handle multiple outgoing edges. XRef fallThroughEdge = null; XRef nonFallThroughEdge = null; foreach (XRef flow in cfg.GetReferencesFrom(source)) { if (flow.Type == XRefType.FallThrough) { if (fallThroughEdge != null) { throw new InvalidOperationException("Cannot have more than one fall-through edge."); } fallThroughEdge = flow; } else { if (nonFallThroughEdge != null) { throw new InvalidOperationException("Cannot have more than one non-fall-through edge."); } nonFallThroughEdge = flow; } } // Hash the special flow type and add target to queue. if (nonFallThroughEdge != null) { ComputeMore(hasher, (int)nonFallThroughEdge.Type); queue.Enqueue(nonFallThroughEdge.Target); } // Fall through to the next block if any. if (fallThroughEdge != null) { source = fallThroughEdge.Target; } else { break; } } } }
/// <summary> /// Analyzes code starting from the given location, and create basic /// blocks iteratively. /// </summary> public void GenerateBasicBlocks(Address entryPoint, XRefType entryType) { Address address = entryPoint; // Maintain a queue of basic block entry points to analyze. At // the beginning, only the user-specified entry point is in the // queue. As we encounter b/c/j instructions during the course // of analysis, we push the target addresses to the queue of // entry points to be analyzed later. PriorityQueue <XRef> xrefQueue = new PriorityQueue <XRef>(XRef.CompareByPriority); // Maintain a list of all procedure calls (with known target) // encountered during the analysis. After we finish analyzing // all the basic blocks, we update the list of procedures. // List<XRef> xrefCalls = new List<XRef>(); // Create a a dummy xref entry using the user-supplied starting // address. xrefQueue.Enqueue(new XRef( type: entryType, source: Address.Invalid, target: entryPoint )); // Analyze each cross reference in order of their priority. // In particular, if the xref is an indexed jump, we delay its // processing until we have processed all other types of xrefs. // This reduces the chance that we process past the end of a // jump table. while (!xrefQueue.IsEmpty) { XRef entry = xrefQueue.Dequeue(); // Handle jump table entry, whose Target == Invalid. if (entry.Type == XRefType.NearIndexedJump) { System.Diagnostics.Debug.Assert(entry.Target == Address.Invalid); // Fill the Target field to make it a static xref. entry = ProcessJumpTableEntry(entry, xrefQueue); if (entry == null) // end of jump table { continue; } } // Skip other dynamic xrefs. if (entry.Target == Address.Invalid) { CrossReferences.Add(entry); continue; } // Process the basic block starting at the target address. BasicBlock block = AnalyzeBasicBlock(entry, xrefQueue); if (block != null) { //int count = block.Length; //int baseOffset = PointerToOffset(entry.Target); //proc.CodeRange.AddInterval(baseOffset, baseOffset + count); //proc.ByteRange.AddInterval(baseOffset, baseOffset + count); //for (int j = 0; j < count; j++) //{ // image[baseOffset + j].Procedure = proc; //} #if false proc.AddBasicBlock(block); #endif } CrossReferences.Add(entry); } }
/// <summary> /// Gets the image associated with the segment specified by its id. /// </summary> /// <param name="segmentId">Id of the segment to resolve.</param> /// <returns>The image associated with the given segment, or null if /// the segment id is invalid.</returns> //protected abstract ImageChunk ResolveSegment(int segmentId); #region Flow Analysis Methods /// <summary> /// Analyzes a contiguous sequence of instructions that form a basic /// block. A basic block terminates as soon as any of the following /// conditions is true: /// - An analysis error occurs /// - An block terminating instructions: RET, RETF, IRET, HLT. /// - A b/c/j instruction: Jcc, JMP, JMPF, LOOPcc. /// </summary> /// <param name="start">Address to begin analysis.</param> /// <param name="xrefs">Collection to add xrefs to.</param> /// <returns> /// A new BasicBlock if one was created during the analysis. /// If no new BasicBlocks are created, or if an existing block was /// split into two, returns null. /// </returns> // TODO: should be roll-back the entire basic block if we // encounters an error on our way? maybe not. protected virtual BasicBlock AnalyzeBasicBlock(XRef start, ICollection <XRef> xrefs) { Address ip = start.Target; // instruction pointer if (!image.IsAddressValid(ip)) { AddError(ip, ErrorCode.OutOfImage, "XRef target is outside of the image (referred from {0})", start.Source); return(null); } // Check if the entry address is already analyzed. ByteAttribute b = image[ip]; if (b.Type != ByteType.Unknown) { // Fail if we ran into data or padding while expecting code. if (b.Type != ByteType.Code) { AddError(ip, ErrorCode.RanIntoData, "XRef target is in the middle of data (referred from {0})", start.Source); return(null); } // Now the byte was previously analyzed as code. We must have // already created a basic block that contains this byte. BasicBlock block = BasicBlocks.Find(ip); System.Diagnostics.Debug.Assert(block != null); // If the existing block starts at this address, we're done. if (block.Location == ip) { return(null); } // TBD: recover the following in some way... #if false if (image[b.BasicBlock.StartAddress].Address.Segment != pos.Segment) { AddError(pos, ErrorCategory.Error, "Ran into the middle of a block [{0},{1}) from another segment " + "when processing block {2} referred from {3}", b.BasicBlock.StartAddress, b.BasicBlock.EndAddress, start.Target, start.Source); return(null); } #endif // Now split the existing basic block into two. This requires // that the cut-off point is at instruction boundary. if (!b.IsLeadByte) { AddError(ip, ErrorCode.RanIntoCode, "XRef target is in the middle of an instruction (referred from {0})", start.Source); return(null); } BasicBlock[] subBlocks = BasicBlocks.SplitBasicBlock(block, ip, image); // Create a xref from the previous block to this block. XRef xref = CreateFallThroughXRef(GetLastInstructionInBasicBlock(subBlocks[0]), ip); xrefs.Add(xref); return(null); } // TODO: Move the above into a separate procedure. // Analyze each instruction in sequence until we encounter // analyzed code, flow instruction, or an error condition. BasicBlockType blockType = BasicBlockType.Unknown; while (true) { // Decode an instruction at this location. Address instructionStart = ip; Instruction insn = CreateInstruction(ip, start); if (insn == null) { AddError(ip, ErrorCode.BrokenBasicBlock, "Basic block ended prematurally because of invalid instruction."); blockType = BasicBlockType.Broken; break; } Address instructionEnd = ip + insn.EncodedLength; // Advance the instruction pointer. ip = instructionEnd; // Determine whether this instruction affects control flow. XRefType flowType = GetFlowInstructionType(insn.Operation); if (flowType != XRefType.None) { // Creates an active cross reference if necessary. if (NeedsActiveXRef(flowType)) { XRef xref = CreateFlowXRef(flowType, instructionStart, insn); if (xref != null) { xrefs.Add(xref); } } // Creates a fall-through cross reference if necessary. if (CanFallThrough(flowType)) { XRef xref = CreateFallThroughXRef(instructionStart, instructionEnd); xrefs.Add(xref); } // Terminate the block. blockType = GetBasicBlockType(flowType); break; } // If the new location is already analyzed as code, create a // control-flow edge from the previous block to the existing // block, and we are done. if (!image.IsAddressValid(ip)) { blockType = BasicBlockType.Broken; break; } if (image[ip].Type == ByteType.Code) { System.Diagnostics.Debug.Assert(image[ip].IsLeadByte); XRef xref = CreateFallThroughXRef(instructionStart, instructionEnd); xrefs.Add(xref); blockType = BasicBlockType.FallThrough; break; } } // Create a basic block unless we failed on the first instruction. if (ip.Offset > start.Target.Offset) { BasicBlock block = new BasicBlock(start.Target, ip, blockType, image); BasicBlocks.Add(block); } return(null); }
/// <summary> /// Fills the Target of an IndexedJump xref heuristically by plugging /// in the jump target stored in DataLocation and performing various /// sanity checks. /// </summary> /// <param name="entry">A xref of type IndexedJump whose Target field /// is Invalid.</param> /// <param name="xrefs">Collection to add a new dynamic IndexedJump /// xref to, if any.</param> /// <returns>The updated xref, or null if the jump table ends.</returns> private XRef ProcessJumpTableEntry(XRef entry, ICollection <XRef> xrefs) { #if true return(null); #else System.Diagnostics.Debug.Assert( entry.Type == XRefType.NearIndexedJump && entry.Target == LogicalAddress.Invalid, "Entry must be NearIndexedJump with unknown target"); // Verify that the location that supposedly stores the jump table // entry is not analyzed as anything else. If it is, it indicates // that the jump table ends here. LinearPointer b = entry.DataLocation.LinearAddress; if (image[b].Type != ByteType.Unknown || image[b + 1].Type != ByteType.Unknown) { return(null); } // If the data location looks like in another segment, stop. if (image.LargestSegmentThatStartsBefore(b) > entry.Source.Segment) { return(null); } // TBD: it's always a problem if CS:IP wraps. We need a more // general way to detect and fix it. For this particular case, // we need to check that the jump target is within the space // of this segment. if (entry.DataLocation.Offset >= 0xFFFE) { AddError(entry.DataLocation, ErrorCategory.Error, "Jump table is too big (jumped from {0}).", entry.Source); return(null); } // Find the target address of the jump table entry. ushort jumpOffset = image.GetUInt16(b); Pointer jumpTarget = new Pointer(entry.Source.Segment, jumpOffset); // Check that the target address looks valid. If it doesn't, it // probably indicates that the jump table ends here. if (!image.IsAddressValid(jumpTarget.LinearAddress)) { return(null); } // If the jump target is outside the range of the current segment // but inside the range of a later segment, it likely indicates // that the jump table ends here. // TBD: this heuristic is kind of a hack... we should do better. #if true if (image.LargestSegmentThatStartsBefore(jumpTarget.LinearAddress) > entry.Source.Segment) { return(null); } #endif // BUG: We really do need to check that the destination // is valid. If not, we should stop immediately. if (!(image[jumpTarget].Type == ByteType.Unknown || image[jumpTarget].Type == ByteType.Code && image[jumpTarget].IsLeadByte)) { return(null); } // ... // Mark DataLocation as data and add it to the owning procedure's // byte range. Piece piece = image.CreatePiece( entry.DataLocation, entry.DataLocation + 2, ByteType.Data); Procedure proc = image[entry.Source].Procedure; proc.AddDataBlock(piece.StartAddress, piece.EndAddress); // Add a dynamic xref from the JMP instruction to the next jump // table entry. xrefs.Add(new XRef( type: XRefType.NearIndexedJump, source: entry.Source, target: Pointer.Invalid, dataLocation: entry.DataLocation + 2 )); // Return the updated xref with Target field filled. return(new XRef( type: XRefType.NearIndexedJump, source: entry.Source, target: jumpTarget, dataLocation: entry.DataLocation )); #endif }
private BasicBlock AnalyzeBasicBlock(XRef start, ICollection <XRef> xrefs) { Pointer pos = start.Target; // Check if we are running into the middle of code or data. This // can only happen when we process the first instruction in the // block. if (image[pos].Type != ByteType.Unknown && !image[pos].IsLeadByte) { AddError(pos, ErrorCategory.Error, "XRef target is in the middle of code/data (referred from {0})", start.Source); return(null); } // Check if this location is already analyzed as code. if (image[pos].Type == ByteType.Code) { ByteProperties b = image[pos]; // Now we are already covered by a basic block. If the // basic block *starts* from this address, do nothing. // Otherwise, split the basic block into two. if (b.BasicBlock.StartAddress == pos.LinearAddress) { return(null); } else { if (image[b.BasicBlock.StartAddress].Address.Segment != pos.Segment) { AddError(pos, ErrorCategory.Error, "Ran into the middle of a block [{0},{1}) from another segment " + "when processing block {2} referred from {3}", b.BasicBlock.StartAddress, b.BasicBlock.EndAddress, start.Target, start.Source); return(null); } BasicBlock newBlock = b.BasicBlock.Split(pos.LinearAddress); return(null); // newBlock; } } // Analyze each instruction in sequence until we encounter // analyzed code, flow instruction, or an error condition. while (true) { // Decode an instruction at this location. Pointer insnPos = pos; Instruction insn; try { insn = image.DecodeInstruction(pos); } catch (Exception ex) { AddError(pos, ErrorCategory.Error, "Bad instruction: {0}", ex.Message); break; } // Create a code piece for this instruction. if (!image.CheckByteType(pos, pos + insn.EncodedLength, ByteType.Unknown)) { AddError(pos, "Ran into the middle of code when processing block {0} referred from {1}", start.Target, start.Source); break; } // Advance the byte pointer. Note: the IP may wrap around 0xFFFF // if pos.off + count > 0xFFFF. This is probably not intended. try { Piece piece = image.CreatePiece(pos, pos + insn.EncodedLength, ByteType.Code); pos += insn.EncodedLength; } catch (AddressWrappedException) { AddError(pos, ErrorCategory.Error, "CS:IP wrapped when processing block {1} referred from {2}", start.Target, start.Source); break; } // Check if this instruction terminates the block. if (insn.Operation == Operation.RET || insn.Operation == Operation.RETF || insn.Operation == Operation.HLT) { break; } // Analyze BCJ (branch, jump, call) instructions. Such an // instruction will create a cross reference. XRef xref = AnalyzeFlowInstruction(insnPos, insn); if (xref != null) { xrefs.Add(xref); // If the instruction is a conditional jump, add xref to // the 'no-jump' branch. // TODO: adding a no-jump xref causes confusion when we // browse xrefs in the disassembly listing window. Is it // truely necessary to add these xrefs? if (xref.Type == XRefType.ConditionalJump) { xrefs.Add(new XRef( type: XRefType.ConditionalJump, source: insnPos, target: pos )); } // Finish basic block unless this is a CALL instruction. if (xref.Type == XRefType.ConditionalJump || xref.Type == XRefType.NearJump || xref.Type == XRefType.FarJump || xref.Type == XRefType.NearIndexedJump) { break; } } // If the new location is already analyzed as code, create a // control-flow edge from the previous block to the existing // block, and we are done. if (image[pos].Type == ByteType.Code) { System.Diagnostics.Debug.Assert(image[pos].IsLeadByte); break; } } // Create a basic block unless we failed on the first instruction. if (pos.LinearAddress > start.Target.LinearAddress) { return(image.CreateBasicBlock(start.Target.LinearAddress, pos.LinearAddress)); } else { return(null); } }
public LogicalXRefAddedEventArgs(XRef xref) { this.XRef = xref; }
/// <summary> /// Compares two XRef objects by priority (precedence). An XRef object /// with a smaller numeric Type value has higher precedence, and /// compare smaller (as in a min-priority queue). /// </summary> public static int CompareByPriority(XRef x, XRef y) { int cmp = (int)x.Type - (int)y.Type; return(cmp); }