private void DumpCluster(Cluster cc, ScanResults sr) { Debug.Print("-- Cluster -----------------------------"); Debug.Print("{0} nodes", cc.Blocks.Count); foreach (var block in cc.Blocks.OrderBy(n => n.Address)) { var addrEnd = block.GetEndAddress(); if (sr.KnownProcedures.Contains(block.Address)) { Debug.WriteLine(""); Debug.Print("-- {0}: known procedure ----------", block.Address); } else if (sr.DirectlyCalledAddresses.ContainsKey(block.Address)) { Debug.WriteLine(""); Debug.Print("-- {0}: possible procedure, called {1} time(s) ----------", block.Address, sr.DirectlyCalledAddresses[block.Address]); } Debug.Print("{0}: // pred: {1}", block.Name, string.Join(" ", sr.ICFG.Predecessors(block) .OrderBy(n => n.Address) .Select(n => n.Address))); foreach (var instr in block.Instructions.SelectMany(c => c.Instructions)) { Debug.Print(" {0}", instr); } Debug.Print(" // succ: {0}", string.Join(" ", sr.ICFG.Successors(block) .OrderBy(n => n.Address) .Select(n => n.Address))); } Debug.Print(""); }
/// <summary> /// Performs a shingle scan to recover procedures that weren't found /// a recursive search. Assumed that Program.ImageMap may contain /// blocks of "known code" and only scans in the gaps between the /// blocks. /// </summary> public void ShingleScanProcedures() { var hsc = new ScannerInLinq(Services, Program, this, eventListener); sr = hsc.ScanImage(sr); if (sr != null) { // The heuristic scanner will have detected any remaining // procedures. var procs = sr.Procedures; // At this point, we have RtlProcedures and RtlBlocks. //$TODO: However, Reko hasn't had a chance to reconstitute constants yet, // because that requires SSA, so we may be missing // opportunities to build and detect pointers. This typically happens in // the type inference phase, when we both have constants and their types. // // When this gets merged into analyis-development phase, fold // Procedure construction into SSA construction. foreach (var rtlProc in procs.Where(FilterRtlProcedure)) { var addrProc = rtlProc.Entry.Address; TerminateAnyBlockAt(addrProc); EnqueueProcedure(Program.Architecture, addrProc); } ProcessQueue(); } }
public Scanner( Program program, IDynamicLinker dynamicLinker, IServiceProvider services) : base(program, services.RequireService <DecompilerEventListener>()) { this.segmentMap = program.SegmentMap; this.dynamicLinker = dynamicLinker; this.Services = services; this.eventListener = services.RequireService <DecompilerEventListener>(); this.cancelSvc = services.GetService <CancellationTokenSource>(); if (segmentMap == null) { throw new InvalidOperationException("Program must have an segment map."); } if (program.ImageMap == null) { program.ImageMap = segmentMap.CreateImageMap(); } this.imageMap = program.ImageMap; this.procQueue = new PriorityQueue <WorkItem>(); this.blocks = new BTreeDictionary <Address, BlockRange>(); this.blockStarts = new Dictionary <Block, Address>(); this.importReferences = program.ImportReferences; this.visitedProcs = new HashSet <Procedure>(); this.cinj = new CommentInjector(program.User.Annotations); this.sr = new ScanResults { KnownProcedures = program.User.Procedures.Keys.ToHashSet(), KnownAddresses = program.ImageSymbols.ToDictionary(de => de.Key, de => de.Value), ICFG = new DiGraph <RtlBlock>(), }; }
public ScanResults ScanInstructions(ScanResults sr) { var ranges = FindUnscannedRanges().ToList(); DumpRanges(ranges); var binder = new StorageBinder(); var shsc = new ShingledScanner(this.program, this.host, binder, sr, this.eventListener); bool unscanned = false; foreach (var range in ranges) { unscanned = true; try { shsc.ScanRange(range.Item1, range.Item2, range.Item3, range.Item3.ToLinear() - range.Item2.ToLinear()); } catch (AddressCorrelatedException aex) { host.Error(aex.Address, aex.Message); } } if (!unscanned) { // No unscanned blocks were found. return(null); } shsc.Dump("After shingle scan graph built"); return(sr); }
/// <summary> /// Heuristically disassembles a range of addresses between /// <paramref name="addrStart"/> and <paramref name="addrEnd"/>. /// </summary> /// <param name="addrStart"></param> /// <param name="addrEnd"></param> /// <returns></returns> public HeuristicProcedure DisassembleProcedure(Address addrStart, Address addrEnd) { var proc = new HeuristicProcedure( addrStart, addrEnd, program.Architecture.CreateFrame()); var sr = new ScanResults { ICFG = proc.Cfg, DirectlyCalledAddresses = new Dictionary <Address, int>(), }; var dasm = new HeuristicDisassembler( program, binder, sr, proc.IsValidAddress, true, host); int instrByteGranularity = program.Architecture.InstructionBitSize / 8; for (Address addr = addrStart; addr < addrEnd; addr = addr + instrByteGranularity) { dasm.Disassemble(addr); } DumpBlocks(proc.Cfg.Nodes); return(proc); }
private void RemoveInvalidBlocks(ScanResults sr) { var revGraph = new DiGraph <RtlBlock>(); var invalid = new RtlBlock(null !, "<invalid>"); revGraph.AddNode(invalid); foreach (var b in sr.ICFG.Nodes) { revGraph.AddNode(b); } foreach (var b in sr.ICFG.Nodes) { foreach (var s in sr.ICFG.Successors(b)) { revGraph.AddEdge(s, b); } if (!b.IsValid) { revGraph.AddEdge(invalid, b); } } // Find the transitive closure of invalid nodes. var invalidNodes = new DfsIterator <RtlBlock>(revGraph) .PreOrder(invalid) .ToList(); foreach (var n in invalidNodes.Where(nn => nn != invalid)) { sr.ICFG.RemoveNode(n); sr.DirectlyCalledAddresses.Remove(n.Address); // Debug.Print("Removed invalid node {0}", n.Address); // commented out as this becomes very verbose. } }
public ScanResults ScanImage(ScanResults sr) { this.sr = sr; // sr.WatchedAddresses.Add(Address.Ptr32(0x00404F5C)); //$DEBUG // At this point, we have some entries in the image map // that are data, and unscanned ranges in betweeen. We // have hopefully a bunch of procedure addresses to // break up the unscanned ranges. if (ScanInstructions(sr) == null) { return(sr); } var the_blocks = BuildBasicBlocks(sr); sr.BreakOnWatchedAddress(the_blocks.Select(q => q.Key)); the_blocks = RemoveInvalidBlocks(sr, the_blocks); // Remove blocks that fall off the end of the segment // or into data. Probe(sr); sr.ICFG = BuildIcfg(sr, program.NamingPolicy, the_blocks); Probe(sr); sr.Dump("After shingle scan"); // On processors with variable length instructions, // there may be many blocks that partially overlap the // "real" blocks that would actually have been executed // by the processor. Starting with known "roots", try to // remove as many invalid blocks as possible. var hsc = new BlockConflictResolver( program, sr, program.SegmentMap.IsValidAddress, host); Probe(sr); hsc.ResolveBlockConflicts(sr.KnownProcedures.Concat(sr.DirectlyCalledAddresses.Keys)); Probe(sr); sr.Dump("After block conflict resolution"); // If we detect padding bytes between blocks, // we remove them now. var ppf = new ProcedurePaddingFinder(sr); var pads = ppf.FindPaddingBlocks(); ppf.Remove(pads); var pd = new ProcedureDetector(program, sr, this.eventListener); var procs = pd.DetectProcedures(); sr.Procedures = procs; sr.RemovedPadding = pads; return(sr); }
public Dictionary <Address, block> RemoveInvalidBlocks(ScanResults sr, Dictionary <Address, block> blocks) { // Find transitive closure of bad instructions var bad_blocks = (from i in sr.FlatInstructions.Values where i.type == (ushort)RtlClass.Invalid select i.block_id).ToHashSet(); var new_bad = bad_blocks; var preds = sr.FlatEdges.ToLookup(e => e.second); //Debug.Print("Bad {0}", // string.Join( // "\r\n ", // bad_blocks // .OrderBy(x => x) // .Select(x => string.Format("{0:X8}", x)))); for (;;) { // Find all blocks that are reachable from blocks // that already are known to be "bad", but that don't // end in a call. //$TODO: delay slots. @#$#@ new_bad = new_bad .SelectMany(bad => preds[bad]) .Where(l => !bad_blocks.Contains(l.first) && !BlockEndsWithCall(blocks[l.first])) .Select(l => l.first) .ToHashSet(); if (new_bad.Count == 0) { break; } //Debug.Print("new {0}", // string.Join( // "\r\n ", // bad_blocks // .OrderBy(x => x) // .Select(x => string.Format("{0:X8}", x)))); bad_blocks.UnionWith(new_bad); } Debug.Print("Bad blocks: {0} of {1}", bad_blocks.Count, blocks.Count); //DumpBadBlocks(sr, blocks, sr.FlatEdges, bad_blocks); // Remove edges to bad blocks and bad blocks. sr.FlatEdges = sr.FlatEdges .Where(e => !bad_blocks.Contains(e.second)) .ToList(); blocks = blocks.Values .Where(b => !bad_blocks.Contains(b.id)) .ToDictionary(k => k.id); return(blocks); }
public ProcedureDetector(Program program, ScanResults sr, DecompilerEventListener listener) { this.sr = sr; this.listener = listener; this.procedures = sr.KnownProcedures.Concat(sr.DirectlyCalledAddresses.Keys).ToHashSet(); DumpDuplicates(sr.ICFG.Nodes); this.mpAddrToBlock = sr.ICFG.Nodes.ToDictionary(de => de.Address); }
public DataScanner(Program program, ScanResults sr, DecompilerEventListener listener) : base(program, listener) { this.sr = sr; this.listener = listener; this.queue = new Queue <WorkItem>(); this.procedures = new Dictionary <Address, ImageSymbol>(); }
public ShingledScanner(Program program, IRewriterHost host, IStorageBinder storageBinder, ScanResults sr, DecompilerEventListener eventListener) { this.program = program; this.host = host; this.storageBinder = storageBinder; this.sr = sr; this.eventListener = eventListener; this.Bad = program.Platform.MakeAddressFromLinear(~0ul); }
public Scanner(Program program, IImportResolver importResolver, IServiceProvider services) : base(program, importResolver, services) { this.sr = new ScanResults { KnownProcedures = program.User.Procedures.Keys.ToHashSet(), KnownAddresses = program.ImageSymbols.ToDictionary(de => de.Key, de => de.Value), ICFG = new DiGraph <RtlBlock>(), }; }
public HeuristicProcedureScanner( Program program, ScanResults sr, Func <Address, bool> isAddressValid, IRewriterHost host) { this.program = program; this.host = host; this.sr = sr; this.blocks = sr.ICFG; this.isAddressValid = isAddressValid; this.conflicts = BuildConflictGraph(blocks.Nodes); }
private void DumpClusters(List <Cluster> clusters, ScanResults sr) { var ICFG = sr.ICFG; // Sort clusters by their earliest address foreach (var cc in (from c in clusters let min = c.Blocks.Min(b => b.Address) orderby min select c)) { DumpCluster(cc, sr); } }
// Writes the start and end addresses, size, and successor edges of each block, public void DumpBlocks(ScanResults sr, Dictionary <Address, block> blocks, Action <string> writeLine) { writeLine( string.Join(Environment.NewLine, from b in blocks.Values join i in ( from ii in sr.FlatInstructions.Values group ii by ii.block_id into g select new { block_id = g.Key, max = g.Max(iii => iii.addr.ToLinear() + (uint)iii.size) }) on b.id equals i.block_id join l in sr.FlatInstructions.Values on b.id equals l.addr join e in sr.FlatEdges on b.id equals e.first into es from e in new[] { string.Join(", ", es.Select(ee => string.Format("{0:X8}", ee.second))) } orderby b.id select string.Format( "{0:X8}-{1:X8} ({2}): {3}{4}", b.id, b.id + (i.max - b.id.ToLinear()), i.max - b.id.ToLinear(), RenderType(b.instrs.Last().type), e))); string RenderType(ushort type) { var t = (InstrClass)type; if ((t & InstrClass.Zero) != 0) { return("Zer "); } if ((t & InstrClass.Padding) != 0) { return("Pad "); } if ((t & InstrClass.Call) != 0) { return("Cal "); } if ((t & InstrClass.ConditionalTransfer) == InstrClass.ConditionalTransfer) { return("Bra "); } if ((t & InstrClass.Transfer) != 0) { return("End"); } return("Lin "); } }
void DumpInstructions(ScanResults sr) { Debug.WriteLine( string.Join("\r\n", from instr in sr.FlatInstructions.Values join e in sr.FlatEdges on instr.addr equals e.first into es from e in new[] { string.Join(", ", es.Select(ee => string.Format("{0:X8}", ee.second))) } orderby instr.addr select string.Format( "{0:X8} {1} {2} {3}", instr.addr, instr.size, (char)(instr.type + 'A'), e))); }
public ShingledScanner(Program program, IRewriterHost host, IStorageBinder storageBinder, ScanResults sr, DecompilerEventListener eventListener) { this.program = program; this.host = host; this.storageBinder = storageBinder; this.sr = sr; this.eventListener = eventListener; this.sr.TransferTargets = new HashSet <Address>(); this.sr.DirectlyCalledAddresses = new Dictionary <Address, int>(); this.sr.Instructions = new SortedList <Address, RtlInstructionCluster>(); this.sr.FlatInstructions = new SortedList <Address, ScanResults.instr>(); this.sr.FlatEdges = new List <ScanResults.link>(); this.G = new DiGraph <Address>(); this.Bad = program.Platform.MakeAddressFromLinear(~0ul); G.AddNode(Bad); }
public HeuristicDisassembler( Program program, IStorageBinder binder, ScanResults sr, Func <Address, bool> isAddrValid, bool assumeCallsDiverge, IRewriterHost host) { this.program = program; this.binder = binder; this.sr = sr; this.isAddrValid = isAddrValid; this.assumeCallsDiverge = assumeCallsDiverge; this.host = host; blockMap = new Dictionary <Address, RtlBlock>(); }
// Writes the start and end addresses, size, and successor edge of each block, public void DumpBlocks(ScanResults sr, Dictionary <Address, block> blocks, Action <string> writeLine) { writeLine( string.Join(Environment.NewLine, from b in blocks.Values join i in ( from ii in sr.FlatInstructions.Values group ii by ii.block_id into g select new { block_id = g.Key, max = g.Max(iii => iii.addr.ToLinear() + (uint)iii.size) }) on b.id equals i.block_id join e in sr.FlatEdges on b.id equals e.first into es from e in new[] { string.Join(", ", es.Select(ee => string.Format("{0:X8}", ee.second))) } orderby b.id select string.Format( "{0:X8}-{1:X8} ({2}): {3}", b.id, b.id + (i.max - b.id.ToLinear()), i.max - b.id.ToLinear(), e))); }
private void DumpBadBlocks(ScanResults sr, Dictionary <long, block> blocks, IEnumerable <link> edges, HashSet <Address> bad_blocks) { Debug.Print( "{0}", string.Join(Environment.NewLine, from b in blocks.Values join i in ( from ii in sr.FlatInstructions.Values group ii by ii.block_id into g select new { block_id = g.Key, max = g.Max(iii => iii.addr.ToLinear() + (uint)iii.size) }) on b.id equals i.block_id join e in edges on b.id equals e.first into es from e in new[] { string.Join(", ", es.Select(ee => string.Format("{0:X8}", ee.second))) } orderby b.id select string.Format( "{0:X8}-{1:X8} {2} ({3}): {4}", b.id, b.id + (i.max - b.id.ToLinear()), bad_blocks.Contains(b.id) ? "*" : " ", i.max - b.id.ToLinear(), e))); }
public ScanResults?ScanImage(ScanResults sr) { // At this point, we have some entries in the image map // that are data, and unscanned ranges in betweeen. We // have hopefully a bunch of procedure addresses to // break up the unscanned ranges. var ranges = FindUnscannedRanges(); var stopwatch = new Stopwatch(); var shsc = new ShingledScanner(program, host, storageBinder, sr, eventListener); bool unscanned = false; foreach (var range in ranges) { unscanned = true; try { shsc.ScanRange( program.Architecture, range.Item1, range.Item2, range.Item3, range.Item3); } catch (AddressCorrelatedException aex) { host.Error(aex.Address, aex.Message); } } if (!unscanned) { // No unscanned blocks were found. return(null); } // Remove blocks that fall off the end of the segment // or into data. Probe(sr); shsc.Dump("After shingle scan graph built"); var deadNodes = shsc.RemoveBadInstructionsFromGraph(); shsc.BuildIcfg(deadNodes); Probe(sr); sr.Dump("After shingle scan"); // On processors with variable length instructions, // there may be many blocks that partially overlap the // "real" blocks that would actually have been executed // by the processor. Starting with known "roots", try to // remove as many invalid blocks as possible. var hsc = new BlockConflictResolver( program, sr, program.SegmentMap.IsValidAddress, host); RemoveInvalidBlocks(sr); Probe(sr); hsc.ResolveBlockConflicts(sr.KnownProcedures.Concat(sr.DirectlyCalledAddresses.Keys)); Probe(sr); sr.Dump("After block conflict resolution"); var pd = new ProcedureDetector(program, sr, this.eventListener); var procs = pd.DetectProcedures(); sr.Procedures = procs; return(sr); }
private void Probe(ScanResults sr) { }
private void DumpBlocks(ScanResults sr, Dictionary <Address, block> blocks) { DumpBlocks(sr, blocks, s => Debug.WriteLine(s)); }
public Dictionary <Address, block> BuildBasicBlocks(ScanResults sr) { // Count and save the # of predecessors and successors for each // instruction. foreach (var cSucc in from link in sr.FlatEdges group link by link.first into g select new { addr = g.Key, Count = g.Count() }) { if (sr.FlatInstructions.TryGetValue(cSucc.addr, out var instr)) { instr.succ = cSucc.Count; } } foreach (var cPred in from link in sr.FlatEdges group link by link.second into g select new { addr = g.Key, Count = g.Count() }) { if (sr.FlatInstructions.TryGetValue(cPred.addr, out var instr)) { instr.pred = cPred.Count; } } var the_excluded_edges = new HashSet <link>(); foreach (var instr in sr.FlatInstructions.Values) { if (instr.type != (ushort)RtlClass.Linear) { continue; } if (!sr.FlatInstructions.TryGetValue(instr.addr + instr.size, out var succ)) { continue; } if (instr.succ == 1 && succ.pred == 1 && !sr.KnownProcedures.Contains(succ.addr) && !sr.DirectlyCalledAddresses.ContainsKey(succ.addr)) { succ.block_id = instr.block_id; the_excluded_edges.Add(new link { first = instr.addr, second = succ.addr }); } } // Build global block graph var the_blocks = (from i in sr.FlatInstructions.Values group i by i.block_id into g select new block { id = g.Key, instrs = g.OrderBy(ii => ii.addr).ToArray() }) .ToDictionary(b => b.id); sr.FlatEdges = (from link in sr.FlatEdges join f in sr.FlatInstructions.Values on link.first equals f.addr where !the_excluded_edges.Contains(link) select new link { first = f.block_id, second = link.second }) .Distinct() .ToList(); return(the_blocks); }
public void Probe(ScanResults sr) { sr.BreakOnWatchedAddress(sr.ICFG.Nodes.Select(n => n.Address)); }
public ProcedurePaddingFinder(ScanResults sr) { this.sr = sr; }
/// <summary> /// From the "soup" of instructions and links, we construct /// basic blocks by finding those instructions that have 0 or more than /// 1 predecessor or successors. These instructions delimit the start and /// end of the basic blocks. /// </summary> public static Dictionary <Address, block> BuildBasicBlocks(ScanResults sr) { // Count and save the # of successors for each instruction. foreach (var cSucc in from link in sr.FlatEdges group link by link.first into g select new { addr = g.Key, Count = g.Count() }) { if (sr.FlatInstructions.TryGetValue(cSucc.addr, out var instr)) { instr.succ = cSucc.Count; } } // Count and save the # of predecessors for each instruction. foreach (var cPred in from link in sr.FlatEdges group link by link.second into g select new { addr = g.Key, Count = g.Count() }) { if (sr.FlatInstructions.TryGetValue(cPred.addr, out var instr)) { instr.pred = cPred.Count; } } var the_excluded_edges = new HashSet <link>(); foreach (var instr in sr.FlatInstructions.Values) { // All blocks must start with a linear instruction. if ((instr.type & (ushort)InstrClass.Linear) == 0) { continue; } // Find the instruction that is located directly after instr. if (!sr.FlatInstructions.TryGetValue(instr.addr + instr.size, out instr succ)) { continue; } // If the first instruction was padding the next one must also be padding, // otherwise we start a new block. if (((instr.type ^ succ.type) & (ushort)InstrClass.Padding) != 0) { continue; } // If the first instruction was a zero instruction the next one must also be zero, // otherwise we start a new block. if (((instr.type ^ succ.type) & (ushort)InstrClass.Zero) != 0) { continue; } // If succ follows instr and it's not the entry of a known procedure // or a called address, we don't need the edge between them since they're inside // a basic block. We also mark succ as belonging to the same block as instr. // Since we're iterating through FlatInstructions in ascending address order, // the block_id's will propagate from the first instruction in each block // to the next. if (instr.succ == 1 && succ.pred == 1 && !sr.KnownProcedures.Contains(succ.addr) && !sr.DirectlyCalledAddresses.ContainsKey(succ.addr)) { succ.block_id = instr.block_id; the_excluded_edges.Add(new link { first = instr.addr, second = succ.addr }); } } // Build the blocks by grouping the instructions. var the_blocks = (from i in sr.FlatInstructions.Values group i by i.block_id into g select new block { id = g.Key, instrs = g.OrderBy(ii => ii.addr).ToArray() }) .ToDictionary(b => b.id); // Exclude the now useless edges. sr.FlatEdges = (from link in sr.FlatEdges join f in sr.FlatInstructions.Values on link.first equals f.addr where !the_excluded_edges.Contains(link) select new link { first = f.block_id, second = link.second }) .Distinct() .ToList(); return(the_blocks); }