protected DisassemblerBase(BinaryImage image) { if (image == null) throw new ArgumentNullException("image"); this.image = image; }
public InstructionCollection(BinaryImage image) { if (image == null) throw new ArgumentNullException("image"); this.image = image; }
public static CodeChecksum Compute(Procedure procedure, BinaryImage image) { using (HashAlgorithm hasher = MD5.Create()) { ComputeMore(hasher, procedure, image); hasher.TransformFinalBlock(new byte[0], 0, 0); return(new CodeChecksum(hasher.Hash)); } }
public InstructionCollection(BinaryImage image) { if (image == null) { throw new ArgumentNullException("image"); } this.image = image; }
public static CodeChecksum Compute(Procedure procedure, BinaryImage image) { using (HashAlgorithm hasher = MD5.Create()) { ComputeMore(hasher, procedure, image); hasher.TransformFinalBlock(new byte[0], 0, 0); return new CodeChecksum(hasher.Hash); } }
protected DisassemblerBase(BinaryImage image) { if (image == null) { throw new ArgumentNullException("image"); } this.image = image; }
public IEnumerable <Instruction> GetInstructions(BinaryImage image) { for (Address p = this.location; p != this.location + length;) { Instruction instruction = image.Instructions.Find(p); yield return(instruction); p += instruction.EncodedLength; } }
public BasicBlock(Address begin, Address end, BasicBlockType type, BinaryImage image) { if (begin.Segment != end.Segment) { throw new ArgumentException("Basic block must be on the same segment."); } this.location = begin; this.length = end.Offset - begin.Offset; this.type = type; this.features = CodeFeaturesHelper.GetFeatures(GetInstructions(image)); }
/// <summary> /// Computes the checksum of a basic block. /// </summary> private static void ComputeMore( HashAlgorithm hasher, BasicBlock basicBlock, BinaryImage image) { ArraySegment <byte> code = image.GetBytes(basicBlock.Location, basicBlock.Length); int index = code.Offset; // TODO: maybe we should subclass X86Codec.Instruction to provide // rich functionalities??? foreach (Instruction instruction in basicBlock.GetInstructions(image)) { ComputeMore(hasher, code.Array, index, instruction); index += instruction.EncodedLength; } }
/// <summary> /// Splits an existing basic block into two. This basic block must /// be in the collection. /// </summary> /// <param name="block"></param> public BasicBlock[] SplitBasicBlock(BasicBlock block, Address cutoff, BinaryImage image) { ++TimesSplit; if (block == null) { throw new ArgumentNullException("block"); } if (!block.Bounds.Contains(cutoff)) { throw new ArgumentOutOfRangeException("cutoff"); } if (cutoff == block.Location) { return(null); } int segment = block.Location.Segment; if (segment < 0 || segment >= map.Count) { throw new ArgumentException("Block must be within the collection."); } int index; if (!map[segment].TryGetValue(block.Location.Offset, out index)) { throw new ArgumentException("Block must be within the collection."); } // Create two blocks. var range = block.Bounds; BasicBlock block1 = new BasicBlock(range.Begin, cutoff, BasicBlockType.FallThrough, image); BasicBlock block2 = new BasicBlock(cutoff, range.End, block.Type, image); // Replace the big block from this collection and add the newly // created smaller blocks. blocks[index] = block1; blocks.Add(block2); // Update lookup map. map[segment].Add(block2.Location.Offset, blocks.Count - 1); // Return the two basic blocks. return(new BasicBlock[2] { block1, block2 }); }
private static void ComputeMore( HashAlgorithm hasher, Procedure procedure, BinaryImage image) { // TODO: add the traversal logic into Graph class. // or maybe GraphAlgorithms.Traversal(...). // Create a queue to simulate breadth-first-search. It doesn't // really matter whether DFS or BFS is used as long as we stick // to it, but BFS has the benefit that it's easier to understand. // Therefore we use it. Queue <Address> queue = new Queue <Address>(); queue.Enqueue(procedure.EntryPoint); // Map the entry point address of a basic block to its index // in the sequence of blocks visited. Each block that is the // target of a none-fall-through control flow edge is assigned // an index the first time it is encountered. This index is // included in the hash to provide a hint of the graph's // structure. Dictionary <Address, int> visitOrder = new Dictionary <Address, int>(); XRefCollection cfg = image.BasicBlocks.ControlFlowGraph.Graph; // Traverse the graph. while (queue.Count > 0) { Address source = queue.Dequeue(); // Check if this block has been visited before. If it has, // we just hash its order and work on next one. int order; if (visitOrder.TryGetValue(source, out order)) // visited { ComputeMore(hasher, order); continue; } // If the block has not been visited, assign a unique order // to it, and hash this order. order = visitOrder.Count; visitOrder.Add(source, order); ComputeMore(hasher, order); // Next, we hash the instructions in the block. We follow any // fall-through edges so that the resulting hash will not be // affected by artificial blocks. To see this, consider the // following example: // // MySub: LibSub1: // mov ax, bx mov ax, bx // LibSub2: // mov bx, cx mov bx, cx // ret ret // // MySub and LibSub1 are identical procedures with three // instructions. However, if someone calls into the middle of // LibSub1, the block must be split in two and a fall-through // edge is created from LibSub1 to LibSub2. If we don't follow // the fall-through edge, it will generate a different hash // from the left-side one. while (true) { BasicBlock block = image.BasicBlocks.Find(source); System.Diagnostics.Debug.Assert(block != null); // Hash the instructions in the block. Only the opcode // part of each instruction is hashed; the displacement // and immediate parts are potentially subject to fix-up, // and are therefore ignored in the hash. ComputeMore(hasher, block, image); // Enumerate each block referred to from this block. // We must order the (none-fall-through) outgoing flow // edges in a way that depends only on the graph's // structure and not on the particular arrangement of // target blocks. (Note: this is not a concern if we only // have one none-fall-through outgoing edge; but this may // be of concern if we have multiple outgoing edges, such // as in an indexed jump.) // // TBD: handle multiple outgoing edges. XRef fallThroughEdge = null; XRef nonFallThroughEdge = null; foreach (XRef flow in cfg.GetReferencesFrom(source)) { if (flow.Type == XRefType.FallThrough) { if (fallThroughEdge != null) { throw new InvalidOperationException("Cannot have more than one fall-through edge."); } fallThroughEdge = flow; } else { if (nonFallThroughEdge != null) { throw new InvalidOperationException("Cannot have more than one non-fall-through edge."); } nonFallThroughEdge = flow; } } // Hash the special flow type and add target to queue. if (nonFallThroughEdge != null) { ComputeMore(hasher, (int)nonFallThroughEdge.Type); queue.Enqueue(nonFallThroughEdge.Target); } // Fall through to the next block if any. if (fallThroughEdge != null) { source = fallThroughEdge.Target; } else { break; } } } }
private static void ComputeMore( HashAlgorithm hasher, Procedure procedure, BinaryImage image) { // TODO: add the traversal logic into Graph class. // or maybe GraphAlgorithms.Traversal(...). // Create a queue to simulate breadth-first-search. It doesn't // really matter whether DFS or BFS is used as long as we stick // to it, but BFS has the benefit that it's easier to understand. // Therefore we use it. Queue<Address> queue = new Queue<Address>(); queue.Enqueue(procedure.EntryPoint); // Map the entry point address of a basic block to its index // in the sequence of blocks visited. Each block that is the // target of a none-fall-through control flow edge is assigned // an index the first time it is encountered. This index is // included in the hash to provide a hint of the graph's // structure. Dictionary<Address, int> visitOrder = new Dictionary<Address, int>(); XRefCollection cfg = image.BasicBlocks.ControlFlowGraph.Graph; // Traverse the graph. while (queue.Count > 0) { Address source = queue.Dequeue(); // Check if this block has been visited before. If it has, // we just hash its order and work on next one. int order; if (visitOrder.TryGetValue(source, out order)) // visited { ComputeMore(hasher, order); continue; } // If the block has not been visited, assign a unique order // to it, and hash this order. order = visitOrder.Count; visitOrder.Add(source, order); ComputeMore(hasher, order); // Next, we hash the instructions in the block. We follow any // fall-through edges so that the resulting hash will not be // affected by artificial blocks. To see this, consider the // following example: // // MySub: LibSub1: // mov ax, bx mov ax, bx // LibSub2: // mov bx, cx mov bx, cx // ret ret // // MySub and LibSub1 are identical procedures with three // instructions. However, if someone calls into the middle of // LibSub1, the block must be split in two and a fall-through // edge is created from LibSub1 to LibSub2. If we don't follow // the fall-through edge, it will generate a different hash // from the left-side one. while (true) { BasicBlock block = image.BasicBlocks.Find(source); System.Diagnostics.Debug.Assert(block != null); // Hash the instructions in the block. Only the opcode // part of each instruction is hashed; the displacement // and immediate parts are potentially subject to fix-up, // and are therefore ignored in the hash. ComputeMore(hasher, block, image); // Enumerate each block referred to from this block. // We must order the (none-fall-through) outgoing flow // edges in a way that depends only on the graph's // structure and not on the particular arrangement of // target blocks. (Note: this is not a concern if we only // have one none-fall-through outgoing edge; but this may // be of concern if we have multiple outgoing edges, such // as in an indexed jump.) // // TBD: handle multiple outgoing edges. XRef fallThroughEdge = null; XRef nonFallThroughEdge = null; foreach (XRef flow in cfg.GetReferencesFrom(source)) { if (flow.Type == XRefType.FallThrough) { if (fallThroughEdge != null) throw new InvalidOperationException("Cannot have more than one fall-through edge."); fallThroughEdge = flow; } else { if (nonFallThroughEdge != null) throw new InvalidOperationException("Cannot have more than one non-fall-through edge."); nonFallThroughEdge = flow; } } // Hash the special flow type and add target to queue. if (nonFallThroughEdge != null) { ComputeMore(hasher, (int)nonFallThroughEdge.Type); queue.Enqueue(nonFallThroughEdge.Target); } // Fall through to the next block if any. if (fallThroughEdge != null) { source = fallThroughEdge.Target; } else { break; } } } }
/// <summary> /// Computes the checksum of a basic block. /// </summary> private static void ComputeMore( HashAlgorithm hasher, BasicBlock basicBlock, BinaryImage image) { ArraySegment<byte> code = image.GetBytes(basicBlock.Location, basicBlock.Length); int index = code.Offset; // TODO: maybe we should subclass X86Codec.Instruction to provide // rich functionalities??? foreach (Instruction instruction in basicBlock.GetInstructions(image)) { ComputeMore(hasher, code.Array, index, instruction); index += instruction.EncodedLength; } }
public ByteAttributeCollection(BinaryImage image) { this.image = image; }
private void UpdateImage(ObjectModule module) { #if false // For each segment, construct a list of LEDATA/LIDATA records. // These records fill data into the segment. // It is required that the data do not overlap, and do not // exceed segment boundary (here we only support 16-bit segments, // whose maximum size is 64KB). // Find the first CODE segment. LogicalSegment codeSegment = null; foreach (var seg in module.Segments) { if (seg.Class == "CODE") { codeSegment = seg; break; } } if (codeSegment == null) return; // Create a BinaryImage with the code. BinaryImage image = new BinaryImage(codeSegment.Image.Data, new Pointer(0, 0)); // Disassemble the instructions literally. Note that this should // be improved, but we don't do that yet. var addr = image.BaseAddress; for (var i = image.StartAddress; i < image.EndAddress; ) { var instruction = image.DecodeInstruction(addr); // An operand may have zero or one component that may be // fixed up. Check this. #if false for (int k = 0; k < instruction.Operands.Length; k++) { var operand = instruction.Operands[k]; if (operand is RelativeOperand) { var opr = (RelativeOperand)operand; var loc = opr.Offset.Location; int j = i - image.StartAddress + loc.StartOffset; int fixupIndex = codeSegment.DataFixups[j]; if (fixupIndex != 0) { FixupDefinition fixup = codeSegment.Fixups[fixupIndex - 1]; if (fixup.DataOffset != j) continue; var target = new SymbolicTarget(fixup, module); instruction.Operands[k] = new SymbolicRelativeOperand(target); System.Diagnostics.Debug.WriteLine(instruction.ToString()); } } } #endif image.CreatePiece(addr, addr + instruction.EncodedLength, ByteType.Code); image[addr].Instruction = instruction; addr = addr.Increment(instruction.EncodedLength); // TODO: we need to check more accurately. #if false // Check if any bytes covered by this instruction has a fixup // record associated with it. Note that an instruction might // have multiple fixup records associated with it, such as // in a far call. for (int j = 0; j < instruction.EncodedLength; j++) { int fixupIndex = codeSegment.DataFixups[i - image.StartAddress + j]; if (fixupIndex != 0) { FixupDefinition fixup = codeSegment.Fixups[fixupIndex - 1]; if (fixup.DataOffset != i - image.StartAddress + j) continue; if (fixup.Target.Method == FixupTargetSpecFormat.ExternalPlusDisplacement || fixup.Target.Method == FixupTargetSpecFormat.ExternalWithoutDisplacement) { var extIndex = fixup.Target.IndexOrFrame; var extName = module.ExternalNames[extIndex - 1]; var disp = fixup.Target.Displacement; System.Diagnostics.Debug.WriteLine(string.Format( "{0} refers to {1}+{2} : {3}", instruction, extName, disp, fixup.Location)); } } } #endif i += instruction.EncodedLength; } // ... // Display the code in our disassmbly window. if (this.ListingWindow != null) { Document doc = new Document(); doc.Image = image; this.ListingWindow.Document = doc; } #endif }