/// <summary> /// Summarizes the net effect each procedure has on registers, /// then removes trashed registers that aren't live-out. /// </summary> public List <SsaTransform> UntangleProcedures() { eventListener.ShowProgress("Rewriting procedures.", 0, program.Procedures.Count); var ssts = new List <SsaTransform>(); IntraBlockDeadRegisters.Apply(program, eventListener); if (eventListener.IsCanceled()) { return(ssts); } AdjacentBranchCollector.Transform(program, eventListener); if (eventListener.IsCanceled()) { return(ssts); } ssts = RewriteProceduresToSsa(); if (eventListener.IsCanceled()) { return(ssts); } // Recreate user-defined signatures. It should prevent type // inference between user-defined parameters and other expressions var usb = new UserSignatureBuilder(program); usb.BuildSignatures(eventListener); if (eventListener.IsCanceled()) { return(ssts); } // Discover ssaId's that are live out at each call site. // Delete all others. var uvr = new UnusedOutValuesRemover( program, ssts.Select(sst => sst.SsaState), this.flow, dynamicLinker, eventListener); uvr.Transform(); if (eventListener.IsCanceled()) { return(ssts); } // At this point, the exit blocks contain only live out registers. // We can create signatures from that. CallRewriter.Rewrite(program.Platform, ssts, this.flow, eventListener); return(ssts); }
/// <summary> /// Extracts structured program constructs out of snarled goto nests, if possible. /// Since procedures are now independent of each other, this analysis /// is done one procedure at a time. /// </summary> public void StructureProgram() { foreach (var program in project.Programs) { int i = 0; foreach (Procedure proc in program.Procedures.Values) { try { eventListener.ShowProgress("Rewriting procedures to high-level language.", i, program.Procedures.Values.Count); ++i; Console.WriteLine("rewriting: {0}", proc); IStructureAnalysis sa = new StructureAnalysis(proc); sa.Structure(); } catch (Exception e) { eventListener.Error( eventListener.CreateProcedureNavigator(program, proc), e, "An error occurred while rewriting procedure to high-level language."); } } WriteDecompilerProducts(); } eventListener.ShowStatus("Rewriting complete."); }
public void RewriteProgram(Program program) { int cProc = program.Procedures.Count; int i = 0; foreach (Procedure proc in program.Procedures.Values) { eventListener.ShowProgress("Rewriting expressions.", i++, cProc); RewriteFormals(proc.Signature); foreach (Statement stm in proc.Statements) { if (eventListener.IsCanceled()) { return; } try { stm.Instruction = stm.Instruction.Accept(this); } catch (Exception ex) { Debug.WriteLine( string.Format("Exception in TypedExpressionRewriter.RewriteProgram: {0} ({1})\r\n{2}", proc, ex.Message, ex.StackTrace)); // reset flags after error dereferenced = false; } } } }
public void Build(Program program) { // Special case for the global variables. In essence, // a memory map of all the globals. var tvGlobals = store.EnsureExpressionTypeVariable(factory, program.Globals, "globals_t"); tvGlobals.OriginalDataType = program.Globals.DataType; EnsureSegmentTypeVariables(program.SegmentMap.Segments.Values); int cProc = program.Procedures.Count; int i = 0; foreach (Procedure proc in program.Procedures.Values) { if (listener.IsCanceled()) { return; } listener.ShowProgress("Gathering primitive datatypes from instructions.", i++, cProc); this.signature = proc.Signature; EnsureSignatureTypeVariables(this.signature); foreach (Statement stm in proc.Statements) { stmCur = stm; stm.Instruction.Accept(this); } } }
/// <summary> /// Extracts structured program constructs out of snarled goto nests, if possible. /// Since procedures are now independent of each other, this analysis /// is done one procedure at a time. /// </summary> public void StructureProgram() { if (project is null) { return; } foreach (var program in project.Programs) { int i = 0; foreach (Procedure proc in program.Procedures.Values) { if (eventListener.IsCanceled()) { return; } try { eventListener.ShowProgress("Rewriting procedures to high-level language.", i, program.Procedures.Values.Count); ++i; IStructureAnalysis sa = new StructureAnalysis(eventListener, program, proc); sa.Structure(); } catch (Exception e) { eventListener.Error( eventListener.CreateProcedureNavigator(program, proc), e, "An error occurred while rewriting procedure to high-level language."); } } } project.FireScriptEvent(ScriptEvent.OnProgramDecompiled); WriteDecompilerProducts(); eventListener.ShowStatus("Rewriting complete."); }
/// Plan of attack: /// In each unscanned "hole", look for signatures of procedure entries. /// These are procedure entry candidates. /// Scan each of the procedure entry candidates heuristically. /// /// Next scan all executable code segments for: /// - calls that reach those candidates /// - jmps to those candidates /// - pointers to those candidates. /// Each time we find a call, we increase the score of the candidate. /// At the end we have a list of scored candidates. public void ScanImageHeuristically() { var sw = new Stopwatch(); sw.Start(); var list = new List <HeuristicBlock>(); var ranges = FindUnscannedRanges(); var fnRanges = FindPossibleFunctions(ranges).ToList(); int n = 0; foreach (var range in fnRanges) { var hproc = DisassembleProcedure(range.Item1, range.Item2); var hps = new HeuristicProcedureScanner(program, hproc, host); hps.BlockConflictResolution(); DumpBlocks(hproc.Cfg.Nodes); hps.GapResolution(); // TODO: add all guessed code to image map -- clearly labelled. AddBlocks(hproc); list.AddRange(hproc.Cfg.Nodes); eventListener.ShowProgress("Estimating procedures", n, fnRanges.Count); ++n; } eventListener.Warn( new Reko.Core.Services.NullCodeLocation("Heuristics"), string.Format("Scanned image in {0} seconds, finding {1} blocks.", sw.Elapsed.TotalSeconds, list.Count)); list.ToString(); }
public void CollectTypes() { CollectGlobalType(); CollectUserGlobalVariableTypes(); CollectImageSymbols(); int cProc = program.Procedures.Count; int i = 0; foreach (Procedure proc in program.Procedures.Values) { eventListener.ShowProgress("Collecting data types.", i++, cProc); CollectProcedureSignature(proc); foreach (Statement stm in proc.Statements) { if (eventListener.IsCanceled()) { return; } try { this.stmCur = stm; stm.Instruction.Accept(this); } catch (Exception ex) { eventListener.Error( eventListener.CreateStatementNavigator(program, stm), ex, "An error occurred while processing the statement {0}.", stm); } } } }
public void CollectTypes() { desc.MeetDataType(program.Globals, factory.CreatePointer( factory.CreateStructureType(), program.Platform.PointerType.BitSize)); CollectUserGlobalVariableTypes(store.SegmentTypes); int cProc = program.Procedures.Count; int i = 0; foreach (Procedure proc in program.Procedures.Values) { eventListener.ShowProgress("Collecting data types.", i++, cProc); CollectProcedureSignature(proc); foreach (Statement stm in proc.Statements) { if (eventListener.IsCanceled()) { return; } try { this.stmCur = stm; stm.Instruction.Accept(this); } catch (Exception ex) { eventListener.Error( eventListener.CreateStatementNavigator(program, stm), ex, "An error occurred while processing the statement {0}.", stm); } } } }
/// <summary> /// Collects weakly connected components from the ICFG and gathers /// them into Clusters. /// </summary> public List <Cluster> FindClusters() { var nodesLeft = new HashSet <RtlBlock>(sr.ICFG.Nodes); var clusters = new List <Cluster>(); int totalCount = nodesLeft.Count; listener.ShowProgress("Finding procedure candidates", 0, totalCount); while (nodesLeft.Count > 0) { if (listener.IsCanceled()) { break; } var node = nodesLeft.First(); var cluster = new Cluster(); clusters.Add(cluster); BuildWCC(node, cluster, nodesLeft); sr.BreakOnWatchedAddress(cluster.Blocks.Select(b => b.Address)); listener.ShowProgress("Finding procedure candidates", totalCount - nodesLeft.Count, totalCount); } return(clusters); }
/// <summary> /// Processes procedures individually, building complex expression trees out /// of the simple, close-to-the-machine code generated by the disassembly. /// </summary> /// <param name="rl"></param> public void BuildExpressionTrees() { int i = 0; foreach (Procedure proc in program.Procedures.Values) { eventListener.ShowProgress("Building complex expressions.", i, program.Procedures.Values.Count); ++i; try { var larw = new LongAddRewriter(proc, program.Architecture); larw.Transform(); Aliases alias = new Aliases(proc, program.Architecture, flow); alias.Transform(); var doms = new DominatorGraph <Block>(proc.ControlGraph, proc.EntryBlock); var sst = new SsaTransform(flow, proc, importResolver, doms); var ssa = sst.SsaState; var cce = new ConditionCodeEliminator(ssa.Identifiers, program.Platform); cce.Transform(); //var cd = new ConstDivisionImplementedByMultiplication(ssa); //cd.Transform(); DeadCode.Eliminate(proc, ssa); var vp = new ValuePropagator(program.Architecture, ssa.Identifiers, proc); vp.Transform(); DeadCode.Eliminate(proc, ssa); // Build expressions. A definition with a single use can be subsumed // into the using expression. var coa = new Coalescer(proc, ssa); coa.Transform(); DeadCode.Eliminate(proc, ssa); var liv = new LinearInductionVariableFinder( proc, ssa.Identifiers, new BlockDominatorGraph(proc.ControlGraph, proc.EntryBlock)); liv.Find(); foreach (KeyValuePair <LinearInductionVariable, LinearInductionVariableContext> de in liv.Contexts) { var str = new StrengthReduction(ssa, de.Key, de.Value); str.ClassifyUses(); str.ModifyUses(); } var opt = new OutParameterTransformer(proc, ssa.Identifiers); opt.Transform(); DeadCode.Eliminate(proc, ssa); // Definitions with multiple uses and variables joined by PHI functions become webs. var web = new WebBuilder(proc, ssa.Identifiers, program.InductionVariables); web.Transform(); ssa.ConvertBack(false); } catch (StatementCorrelatedException stex) { eventListener.Error( eventListener.CreateBlockNavigator(program, stex.Statement.Block), stex, "An error occurred during data flow analysis."); } catch (Exception ex) { eventListener.Error( new NullCodeLocation(proc.Name), ex, "An error occurred during data flow analysis."); } } }
/// <summary> /// Disassemble every byte of a range of addresses, marking those /// addresses that likely are code as MaybeCode, everything else as /// data. Simultaneously, the graph G of cross references is built /// up. /// </summary> /// <remarks> /// The plan is to disassemble every location of the range, building /// a reverse control graph. Any jump to an illegal address or any /// invalid instruction will result in an edge from "bad" to that /// instruction. /// </remarks> /// <param name="segment"></param> /// <returns>An array of bytes classifying each byte as code or data. /// </returns> public byte[] ScanRange(MemoryArea mem, Address addrStart, Address addrEnd, ulong workToDo) { var cbAlloc = addrEnd - addrStart; var y = new byte[cbAlloc]; // Advance by the instruction granularity. var step = program.Architecture.InstructionBitSize / 8; var delaySlot = InstrClass.None; var rewriterCache = new Dictionary <Address, IEnumerator <RtlInstructionCluster> >(); for (var a = 0; a < y.Length; a += step) { y[a] = MaybeCode; var addr = addrStart + a; var dasm = GetRewriter(addr, rewriterCache); if (!dasm.MoveNext()) { sr.Invalid.Add(addr); AddEdge(Bad, addr); continue; } var i = dasm.Current; if (IsInvalid(mem, i)) { sr.Invalid.Add(addr); AddEdge(Bad, i.Address); i.Class = InstrClass.Invalid; AddInstruction(i); delaySlot = InstrClass.None; y[a] = Data; } else { if (MayFallThrough(i)) { if ((delaySlot & DT) != DT) { if (a + i.Length < y.Length) { // Still inside the segment. AddEdge(i.Address + i.Length, i.Address); } else { // Fell off segment, i must be a bad instruction. AddEdge(Bad, i.Address); i.Class = InstrClass.Invalid; AddInstruction(i); y[a] = Data; } } } if ((i.Class & InstrClass.Transfer) != 0) { var addrDest = DestinationAddress(i); if (addrDest != null) { if (IsExecutable(addrDest)) { // call / jump destination is executable if ((i.Class & InstrClass.Call) != 0) { // Don't add edges to other procedures. if (!this.sr.DirectlyCalledAddresses.TryGetValue(addrDest, out int callTally)) { callTally = 0; } this.sr.DirectlyCalledAddresses[addrDest] = callTally + 1; } else { AddEdge(addrDest, i.Address); } } else { // Jump to data / hyperspace. AddEdge(Bad, i.Address); i.Class = InstrClass.Invalid; AddInstruction(i); y[a] = Data; } } else { if ((i.Class & InstrClass.Call) != 0) { this.sr.IndirectCalls.Add(i.Address); } else { this.sr.IndirectJumps.Add(i.Address); } } } // If this is a delayed unconditional branch... delaySlot = i.Class; } if (y[a] == MaybeCode) { AddInstruction(i); } SaveRewriter(addr + i.Length, dasm, rewriterCache); eventListener.ShowProgress("Shingle scanning", sr.Instructions.Count, (int)workToDo); } return(y); }
/// <summary> /// Processes procedures individually, building complex expression trees out /// of the simple, close-to-the-machine code generated by the disassembly. /// </summary> /// <param name="rl"></param> public void BuildExpressionTrees() { int i = 0; foreach (Procedure proc in program.Procedures.Values) { if (eventListener.IsCanceled()) { break; } eventListener.ShowProgress("Building complex expressions.", i, program.Procedures.Values.Count); ++i; try { var sst = BuildSsaTransform(proc); var ssa = sst.SsaState; var fuser = new UnalignedMemoryAccessFuser(ssa); fuser.Transform(); var vp = new ValuePropagator(program.SegmentMap, ssa, importResolver, eventListener); sst.RenameFrameAccesses = true; var icrw = new IndirectCallRewriter(program, ssa, eventListener); while (!eventListener.IsCanceled() && icrw.Rewrite()) { vp.Transform(); sst.Transform(); } var cce = new ConditionCodeEliminator(ssa, program.Platform); cce.Transform(); //var cd = new ConstDivisionImplementedByMultiplication(ssa); //cd.Transform(); DeadCode.Eliminate(proc, ssa); vp.Transform(); DeadCode.Eliminate(proc, ssa); // Build expressions. A definition with a single use can be subsumed // into the using expression. var coa = new Coalescer(proc, ssa); coa.Transform(); DeadCode.Eliminate(proc, ssa); vp.Transform(); var liv = new LinearInductionVariableFinder( proc, ssa.Identifiers, new BlockDominatorGraph(proc.ControlGraph, proc.EntryBlock)); liv.Find(); foreach (KeyValuePair <LinearInductionVariable, LinearInductionVariableContext> de in liv.Contexts) { var str = new StrengthReduction(ssa, de.Key, de.Value); str.ClassifyUses(); str.ModifyUses(); } var opt = new OutParameterTransformer(proc, ssa.Identifiers); opt.Transform(); DeadCode.Eliminate(proc, ssa); // Definitions with multiple uses and variables joined by PHI functions become webs. var web = new WebBuilder(proc, ssa.Identifiers, program.InductionVariables); web.Transform(); ssa.ConvertBack(false); } catch (StatementCorrelatedException stex) { eventListener.Error( eventListener.CreateStatementNavigator(program, stex.Statement), stex, "An error occurred during data flow analysis."); } catch (Exception ex) { eventListener.Error( new NullCodeLocation(proc.Name), ex, "An error occurred during data flow analysis."); } } }
/// <summary> /// Disassemble every byte of the segment, marking those addresses /// that likely are code as MaybeCode, everything else as data. /// </summary> /// <remarks> /// The plan is to disassemble every location of the segment, building /// a reverse control graph. Any jump to an illegal address or any /// invalid instruction will result in an edge from "bad" to that /// instruction. /// </remarks> /// <param name="segment"></param> /// <returns>An array of bytes classifying each byte as code or data. /// </returns> public ScannedSegment ScanSegment(ImageSegment segment, ulong workToDo) { var G = new DiGraph <Address>(); G.AddNode(bad); var cbAlloc = Math.Min( segment.Size, segment.MemoryArea.EndAddress - segment.Address); var y = new byte[cbAlloc]; // Advance by the instruction granularity. var step = program.Architecture.InstructionBitSize / 8; var delaySlot = InstructionClass.None; for (var a = 0; a < y.Length; a += step) { y[a] = MaybeCode; var i = Dasm(segment, a); if (i == null) { AddEdge(G, bad, segment.Address + a); break; } if (IsInvalid(segment.MemoryArea, i)) { AddEdge(G, bad, i.Address); delaySlot = InstructionClass.None; y[a] = Data; } else { if (MayFallThrough(i)) { if (delaySlot != DT) { if (a + i.Length < y.Length) { // Still inside the segment. AddEdge(G, i.Address + i.Length, i.Address); } else { // Fell off segment, i must be a bad instruction. AddEdge(G, bad, i.Address); y[a] = Data; } } } if ((i.InstructionClass & InstructionClass.Transfer) != 0) { var addrDest = DestinationAddress(i); if (addrDest != null) { if (IsExecutable(addrDest)) { // call / jump destination is executable AddEdge(G, addrDest, i.Address); if ((i.InstructionClass & InstructionClass.Call) != 0) { int callTally; if (!this.possibleCallDestinationTallies.TryGetValue(addrDest, out callTally)) { callTally = 0; } this.possibleCallDestinationTallies[addrDest] = callTally + 1; } } else { // Jump to data / hyperspace. AddEdge(G, bad, i.Address); y[a] = Data; } } } // If this is a delayed unconditional branch... delaySlot = i.InstructionClass; } if (y[a] == MaybeCode) { instructions.Add(i.Address, i); } eventListener.ShowProgress("Shingle scanning", instructions.Count, (int)workToDo); } // Find all places that are reachable from "bad" addresses. // By transitivity, they must also be be bad. foreach (var a in new DfsIterator <Address>(G).PreOrder(bad)) { if (a != bad) { y[a - segment.Address] = Data; instructions.Remove(a); // Destination can't be a call destination. possibleCallDestinationTallies.Remove(a); } } // Build blocks out of sequences of instructions. var blocks = BuildBlocks(G, instructions); return(new ScannedSegment { Blocks = blocks, CodeFlags = y, }); }