/// <summary> /// Recursively disassembles the range of addresses specified by the guessed procedure. /// <paramref name="proc"/>. /// </summary> /// <param name="addr"></param> /// <param name="proc"></param> /// <returns></returns> public HeuristicBlock HeuristicDisassemble(Address addr, HeuristicProcedure proc) { var current = new HeuristicBlock(addr, string.Format("l{0:X}", addr)); var rAddr = prog.Architecture.CreateRewriter( prog.CreateImageReader(addr), prog.Architecture.CreateProcessorState(), proc.Frame, host); foreach (var rtl in rAddr.TakeWhile(r => r.Address < proc.EndAddress)) { HeuristicBlock block; if (blockMap.TryGetValue(rtl.Address, out block)) { // This instruction was already disassembled before. if (rtl.Address.ToLinear() != block.Address.ToLinear()) { block = SplitBlock(block, rtl.Address, proc); } if (current.Statements.Count == 0) { // Coincides exactly, return the old block. return block; } else { // Fell into 'block' while disassembling // 'current'. Create a fall-though edge if (!proc.Cfg.Nodes.Contains(current)) { proc.Cfg.Nodes.Add(current); } proc.Cfg.AddEdge(current, block); return current; } } else { // Fresh instruction if (!proc.Cfg.Nodes.Contains(current)) { proc.Cfg.Nodes.Add(current); } current.Statements.Add(rtl); blockMap.Add(rtl.Address, current); var rtlLast = rtl.Instructions.Last(); if (rtlLast is RtlCall || rtlLast is RtlReturn) { // Since calls cannot be dependent on to return, // we stop disassembling. return current; } var rtlJump = rtlLast as RtlGoto; if (rtlJump != null) { var target = rtlJump.Target as Address; if (target == null || target < proc.BeginAddress || target >= proc.EndAddress) { // Stop disassembling if you get outside // the procedure or a computed goto. return current; } block = HeuristicDisassemble(target, proc); proc.Cfg.AddEdge(current, block); return current; } var rtlBranch = rtlLast as RtlBranch; if (rtlBranch != null) { block = HeuristicDisassemble(rtlBranch.Target, proc); proc.Cfg.AddEdge(current, block); block = HeuristicDisassemble(rtl.Address + rtl.Length, proc); proc.Cfg.AddEdge(current, block); return current; } } } return current; }
// Partition memory into chunks betweeen each candidate. // Decode each possible instruction at each possible address, yielding a list of potential instructions. // Identify intra procedural xfers: // - target is in this chunk. // - conditional jmp. // HeuristicFunction will hve // - start address // - end address // To find all of these, scan the all the potential_instructions, if any of them are a GOTO or a RtlBranch. // if found, add to <Set>jump_candidates // Now use scanner to build initial CFG // feed scanner with fn start and all jump_candidates // this may yield dupes and broken blocks. // SpuriousNodes: how to get rid of. // it is possible //to have instructions in the initial call graph that overlap. //In this case, two different basic blocks in the call graph //can contain overlapping instructions starting at slightly //different addresses. When following a sequence of instructions, //the disassembler can arrive at an instruction //that is already part of a previously found basic block. In //the regular case, this instruction is the first instruction of //the existing block. The disassembler can complete the //instruction sequence of the current block and create a //link to the existing basic block in the control flow graph private HeuristicBlock SplitBlock(HeuristicBlock block, Address addr, HeuristicProcedure proc) { var newBlock = new HeuristicBlock(addr, string.Format("l{0:X}", addr)); proc.Cfg.Nodes.Add(newBlock); newBlock.Statements.AddRange( block.Statements.Where(r => r.Address >= addr).OrderBy(r => r.Address)); foreach (var de in blockMap.Where(d => d.Key >= addr && d.Value == block).ToList()) { blockMap[de.Key] = newBlock; } block.Statements.RemoveAll(r => r.Address >= addr); var succs = proc.Cfg.Successors(block).ToArray(); foreach (var s in succs) { proc.Cfg.AddEdge(newBlock, s); proc.Cfg.RemoveEdge(block, s); } proc.Cfg.AddEdge(block, newBlock); return newBlock; }