public static void DoSplitBlocksIntoAssignmentsAndPredicates(ControlFlowGraph cfg) { cfg.Vertices.ForEach(v => { if (v.BalancedCode.IsEmpty() || v.Residue.IsEmpty()) return; var ass = (v.BalancedCode.Last() is Assign) ? v.BalancedCode.Last().AssertCast<Assign>() : null; var lhs = ass == null ? null : ass.Lhs.AssertCast<Ref>().Sym; var @ref = v.Residue.AssertSingle() is Ref ? v.Residue.AssertSingle().AssertCast<Ref>().Sym : null; if (lhs != null && @ref != null && lhs.ProtoId == @ref.ProtoId) { // todo. this introduces a nasty bug if assigned variable is reused later v.BalancedCode.RemoveLast(); v.Residue.SetElements(ass.Rhs); } if (v.BalancedCode.IsEmpty() || v.Residue.IsEmpty()) return; var v_test = new ControlFlowBlock(); v_test.Residue.Add(v.Residue.AssertSingle()); v.Residue.RemoveElements(); var outEdges = cfg.Vedges(v, null); cfg.RemoveEdges(outEdges); cfg.AddVertex(v_test); cfg.AddEdge(new ControlFlowEdge(v, v_test)); outEdges.ForEach(e => cfg.AddEdge(new ControlFlowEdge(v_test, e.Target, e.Tag))); }); }
public static void DoEvictUnreachableCode(ControlFlowGraph cfg) { // todo. this still doesn't completely fix code like "while(false)" var unreachable = cfg.Vertices.Except(cfg.Cflow()); unreachable.Contains(cfg.Finish).AssertFalse(); cfg.RemoveVertices(unreachable); }
public static void DoRemoveReturnThunk(ControlFlowGraph cfg) { var preRets = cfg.Vedges(null, cfg.Finish); if (preRets.Count() > 1) return; var wannabe = preRets.AssertSingle().Source; if (wannabe.BalancedCode.IsEmpty() && wannabe.Residue.SingleOrDefault() is Ref) { var retThunk = wannabe; retThunk.BalancedCode.AssertEmpty(); var auxLocal = retThunk.Residue.AssertSingle().AssertCast<Ref>().Sym; var rets = cfg.Vedges(null, retThunk); rets.AssertEach(ret => ret.Tag == null); cfg.RemoveVertex(retThunk); rets.ForEach(ret => { var src = ret.Source; src.Residue.AssertEmpty(); var ass = src.BalancedCode.Last().AssertCast<Assign>(); var lhs = ass.Lhs.AssertCast<Ref>(); var rhs = ass.Rhs.AssertCast<Expression>(); (lhs.Sym.ProtoId == auxLocal.ProtoId).AssertTrue(); cfg.AddEdge(new ControlFlowEdge(src, cfg.Finish)); src.BalancedCode.RemoveLast(); src.Residue.Add(rhs); }); } }
public DfaHelper(ControlFlowGraph cfg) { _cfg = cfg; // todo. use Cflow instead for being correct with exec order _vertices = cfg.Vertices.ToList(); var allStmts = _vertices.SelectMany(cfb => cfb.BalancedCode.Concat(cfb.Residue.Cast<Node>())).ToReadOnly(); allStmts.ForEach((stmt, i) => PutIntoCache(stmt, i)); }
public static void DoRestoreOpAssignOperators(ControlFlowGraph cfg, Symbols symbols) { var dirty = true; var dfa = new DfaHelper(cfg); while (dirty) { dirty = false; foreach (var atom in dfa.Atoms()) { if (dirty |= TryMatchPattern1(dfa, atom)) break; if (dirty |= TryMatchPattern2(dfa, atom)) break; if (dirty |= TryMatchPattern3(dfa, atom)) break; if (dirty |= TryMatchPattern4(dfa, atom)) break; } } }
public static void DoNormalizeEdgeTags(ControlFlowGraph cfg) { cfg.Vertices.AssertEach(v => cfg.Vedges(v, null).Count() <= 2); var binaryCondEdges = cfg.Edges().Where(e => e.Tag.Arity() == 2); var sourceVertices = binaryCondEdges.Select(e => e.Source).Distinct(); sourceVertices.ForEach(v => { var outEdges = cfg.Vedges(v, null); outEdges.AssertEach(e => e.Tag.Arity() == 2); var operatorType = outEdges.First().Tag.Value.ToOperatorType(); var newTags = outEdges.Select(e => e.Tag == operatorType.ToPredicateType() ? PredicateType.IsTrue : e.Tag == operatorType.ToPredicateType().Negate() ? PredicateType.IsFalse : ((Func<PredicateType>)(() => { throw AssertionHelper.Fail(); }))()); var joint = Operator.Create(operatorType, v.Residue); v.Residue.SetElements(joint); outEdges.Zip(newTags).ForEach(e => e.Item1.Tag = e.Item2); }); cfg.Edges().AssertEach(e => e.Tag.Arity() <= 1); cfg.Vertices.Where(v => v.Residue.IsNotEmpty()).AssertEach(v => v.Residue.Count() == 1); }
public static void DoStripOffRedundancies(ControlFlowGraph cfg) { var allNodes = cfg.Vertices.SelectMany(cfb => Seq.Concat(cfb.BalancedCode, cfb.Residue.Cast<Node>())); allNodes.ForEach(StripOffRedundanciesInPlace); }
public static void DoRestoreCollectionInitializers(ControlFlowGraph cfg) { // here we transform "new T[0]" expressions into collection initializers var allStmts = cfg.Vertices.SelectMany(cfb => cfb.BalancedCode).ToReadOnly(); var allNodes = allStmts.SelectMany(s => s.Family()).ToReadOnly(); var emptyArrayCtors = allNodes.OfType<Eval>().Where(eval => { var ctor = eval.InvokedCtor(); if (ctor == null || !ctor.DeclaringType.IsArray) return false; var app = eval == null ? null : eval.Callee; var arrlen = app == null ? null : app.Args.SingleOrDefault2() as Const; if (arrlen == null || (!(arrlen.Value is int) && !(arrlen.Value is long))) return false; var i_arrlen = arrlen.Value.AssertCoerce<long>(); return i_arrlen == 0; }).ToReadOnly(); emptyArrayCtors.ForEach(eac => eac.Parent.ReplaceRecursive(eac, new CollectionInit(eac))); // now we proceed to decompile stuff like "var a = new T[n]; a[0] = foo0; ... a[n-1] = foo;" var arrayCtors = allStmts.OfType<Assign>().Where(ass => { if (!(ass.Rhs is Eval)) return false; var ctor = ass.Rhs.InvokedCtor(); return ctor == null ? false : ctor.DeclaringType.IsArray; }).ToReadOnly(); // todo #1. this needs to be like RestoreOpAssignOperators // to be capable of restoring recursive collection initializers // todo #2. what about multidimensional initializers for jagges and/or rects? // for the latter t0do n0te that for dimensions higher than 3 arrays of objects define // GetValue(params) and SetValue(params) methods => this won't work correctly foreach (var ctor in arrayCtors) { var cfb = cfg.Vertices.Single(cfb1 => cfb1.BalancedCode.Contains(ctor)); var map = new Dictionary<long, Expression>(); var inits = cfb.BalancedCode.SkipWhile(n => n != ctor).Skip(1).TakeWhile(n => { var m = n.InvokedMethod(); if (m != null && ((m.DeclaringType == typeof(Array) && m.Name == "SetValue") || (m.DeclaringType.IsArray && m.Name == "Set"))) { var args = n.InvocationArgs().AssertNotNull(); if (args.Count() != 3) return false; var arrayRef = args.First(); if (!arrayRef.Equiv(ctor.Lhs)) return false; var value = args.Third() as Expression; if (value == null) return false; var index = args.Second() as Const; if (index == null || (!(index.Value is int) && !(index.Value is long))) return false; var i_index = index.Value.AssertCoerce<long>(); map.Add(i_index, value); return true; } else { return false; } }).ToReadOnly(); var arrlen = ctor.Rhs.InvocationArgs().SingleOrDefault2() as Const; if (arrlen == null || (!(arrlen.Value is int) && !(arrlen.Value is long))) continue; var i_arrlen = arrlen.Value.AssertCoerce<long>(); if (map.Count() != i_arrlen) continue; if (!Set.Equal(0L.Unfold(i => i + 1, i => i < i_arrlen), map.Keys)) continue; var elements = map.OrderBy(kvp => kvp.Key).Select(kvp => kvp.Value).ToReadOnly(); var arrayInit = new CollectionInit(ctor.Rhs.AssertCast<Eval>(), elements); inits.ForEach(init => cfg.Remove(init)); ctor.Rhs = arrayInit; } }
public static Block DoDecompileScopes(ControlFlowGraph cfg) { // todo. do not crash on irregular control flow but rather emit labels/gotos return cfg.DecompileScopes(); }
public static void DoRestoreObjectInitializers(ControlFlowGraph cfg) { // todo. to be implemented }
public static void DoRestoreConditionalOperators(ControlFlowGraph cfg) { // todo. to be implemented }
public static void DoDecompileComplexConditions(ControlFlowGraph cfg) { var flow = cfg.Cflow(cfg.Start); while (true) { var vs = flow.FirstOrDefault(v => v.Residue.Count() == 1 && cfg.Vedges(v, null).Count() == 2 && cfg.Vedges(v, null).All(e => e.Target.BalancedCode.IsEmpty() && e.Target.Residue.Count() == 1)); if (vs == null) break; var conv = cfg.ConvStrict(vs); var ass = conv.BalancedCode.AssertFirst().AssertCast<Assign>(); (ass.Lhs is Ref && ass.Rhs is Loophole).AssertTrue(); var parts = cfg.Cflow(vs, conv); var innerEdges = cfg.Edges(parts, parts).ToList(); cfg.Edges(null, parts).Except(innerEdges).AssertEach(e => e.Target == vs); cfg.Edges(parts, null).Except(innerEdges).AssertEach(e => e.Source == vs || e.Source == conv); while (true) { var somethingWasChanged = false; foreach (var pivot in parts) { var pivot_inEdges = cfg.Vedges(null, pivot); if (pivot_inEdges.Count() != 1) continue; var e_pred2pivot = pivot_inEdges.AssertSingle(); var pred = e_pred2pivot.Source; var pred_outEdges = cfg.Vedges(pred, null); if (pred_outEdges.Count() != 2) continue; var e_pred2target1 = pred_outEdges.AssertSingle(e => e.Target != pivot); var target1 = e_pred2target1.Target; var e_pivot2target1 = cfg.Vedge(pivot, target1); if (e_pivot2target1 == null) continue; var pivot_outEdges = cfg.Vedges(pivot, null); if (pivot_outEdges.Count() != 2) continue; var e_pivot2target2 = pivot_outEdges.AssertSingle(e => e.Target != target1); var target2 = e_pivot2target2.Target; var @operator = e_pred2target1.Condition == PredicateType.IsTrue ? OperatorType.OrElse : e_pred2target1.Condition == PredicateType.IsFalse ? OperatorType.AndAlso : ((Func<OperatorType>)(() => { throw AssertionHelper.Fail(); }))(); var clause_left = pred.Residue.AssertSingle(); var clause_right = pivot.Residue.AssertSingle(); var negate_rhs = e_pred2target1.Condition != e_pivot2target1.Condition; if (negate_rhs) clause_right = Operator.Not(clause_right); var junction = Operator.Create(@operator, clause_left, clause_right); cfg.RemoveVertex(pivot); cfg.AddEdge(new ControlFlowEdge(pred, target2, e_pred2target1.Condition.Negate())); pred.Residue.SetElements(junction); somethingWasChanged |= true; } if (!somethingWasChanged) break; } parts = cfg.Cflow(vs, conv); (parts.Count() == 4).AssertTrue(); var @const = parts.Except(vs, conv).AssertSingle(v => v.Residue.AssertSingle() is Const); var vnext = parts.Except(vs, conv, @const).AssertSingle(); (cfg.Vedge(@const, vnext) == null && cfg.Vedge(vnext, @const) == null).AssertTrue(); cfg.Vedge(vs, vnext).IsConditional.AssertTrue(); cfg.Vedge(vs, @const).IsConditional.AssertTrue(); cfg.Vedge(vnext, conv).IsUnconditional.AssertTrue(); cfg.Vedge(@const, conv).IsUnconditional.AssertTrue(); var estart = vs.Residue.AssertSingle(); var enext = vnext.Residue.AssertSingle(); var cond_const = @const.Residue.AssertSingle().AssertCast<Const>().Value.AssertCast<int>(); var cond_edge = cfg.Vedge(vs, @const).Condition; var val_const = cond_const == 1 ? true : cond_const == 0 ? false : ((Func<bool>)(() => { throw AssertionHelper.Fail(); }))(); var val_edge = cond_edge == PredicateType.IsTrue ? true : cond_edge == PredicateType.IsFalse ? false : ((Func<bool>)(() => { throw AssertionHelper.Fail(); }))(); var operator1 = val_const ? OperatorType.OrElse : OperatorType.AndAlso; var clause_left1 = val_edge && val_const ? estart : Operator.Not(estart); var clause_right1 = !val_edge && !val_const ? Operator.Not(enext) : enext; var junction1 = Operator.Create(operator1, clause_left1, clause_right1); var conv_outEdges = cfg.Vedges(conv, null); var conv_inEdges = cfg.Vedges(null, conv).Except(cfg.Vedges(parts, conv)); cfg.RemoveVertices(@const, vnext, conv); conv_outEdges.ForEach(e => cfg.AddEdge(new ControlFlowEdge(vs, e.Target, e.Tag))); conv_inEdges.ForEach(e => cfg.AddEdge(new ControlFlowEdge(e.Source, vs, e.Tag))); vs.BalancedCode.Add(new Assign(ass.Lhs, junction1)); vs.BalancedCode.AddElements(conv.BalancedCode.Skip(1)); vs.Residue.SetElements(conv.Residue); } cfg.Edges().AssertEach(e => e.Tag.Arity() <= 1); cfg.Vertices.Where(v => v.Residue.IsNotEmpty()).AssertEach(v => v.Residue.Count() == 1); cfg.Vertices.AssertNone(v => v.Residue.IsNotEmpty() && cfg.Vedges(v, null).Any(e => e.IsUnconditional && e.Target != cfg.Finish)); }
public static ControlFlowGraph DoCreateCarcass(IMethodBody cil, out ReadOnlyDictionary<ControlFlowBlock, ReadOnlyCollection<IILOp>> blocks2parts) { // create the control flow graph var cfg = new ControlFlowGraph(); // partition the code into blocks with continuous control flow // todo. support switches and protected regions var targets = new HashSet<IILOp>(cil.OfType<Branch>().Select(br => br.Target)); var l_partitions = new List<ReadOnlyCollection<IILOp>>(); var l_partition = new List<IILOp>(); Action qualifyPartition = () => { if (l_partition.IsNotEmpty()) { l_partitions.Add(l_partition.ToReadOnly()); l_partition = new List<IILOp>(); } }; foreach (var op in cil) { if (op is Branch || op is Ret) qualifyPartition(); else { if (targets.Contains(op)) qualifyPartition(); l_partition.Add(op); if (op is Throw) qualifyPartition(); } } qualifyPartition(); var partitions = l_partitions.ToReadOnly(); // create blocks and map those to ops and partitions blocks2parts = partitions.ToDictionary(p => new ControlFlowBlock(), p => p).ToReadOnly(); blocks2parts.ForEach(kvp => cfg.AddVertex(kvp.Key)); var op2blocks = new Dictionary<IILOp, ControlFlowBlock>(); blocks2parts.ForEach(kvp => kvp.Value.ForEach(op => op2blocks.Add(op, kvp.Key))); cil.ForEach(op => { if (!op2blocks.ContainsKey(op)) op2blocks.Add(op, null); }); // prepare to link the blocks Action<IILOp, IILOp, CilPredicateType?> link = (op1, op2, cil_pred) => { var source = op1 == null ? cfg.Start : op2blocks[op1]; var target = op2 == null ? cfg.Finish : op2blocks[op2]; var hir_pred = cil_pred == null ? (HirPredicateType?)null : (HirPredicateType)Enum.Parse(typeof(HirPredicateType), cil_pred.Value.ToString()); cfg.AddEdge(new ControlFlowEdge(source, target, hir_pred)); }; // link the blocks (down from 300+ LOC to this simple loop =)) if (cil.IsEmpty()) link(null, null, null); foreach (var op in cil) { // todo. support switches here if (op is Switch) throw AssertionHelper.Fail(); // todo. support general case of control flow // n0te. throw needs something on stack, so br > throw is impossible Func<IILOp, bool> isJmp = op1 => op1 is Ret || op1 is Branch; if (isJmp(op) && isJmp(op.Prev)) continue; if (isJmp(op)) { Func<IILOp, CilPredicateType?> pred = op1 => op1 is Ret ? null : op1 is Branch ? ((Branch)op1).PredicateType : ((Func<CilPredicateType?>)(() => { throw AssertionHelper.Fail(); }))(); Func<IILOp, bool> uncond = op1 => isJmp(op1) && pred(op1) == null; Func<IILOp, bool> cond = op1 => isJmp(op1) && pred(op1) != null; Func<IILOp, IILOp> target = null; target = op1 => op1 is Ret ? null : op1 is Branch ? target(((Branch)op1).Target) : op1; (target(op) is Branch).AssertFalse(); if (target(op) is Ret) link(op.Prev, null, pred(op)); else link(op.Prev, target(op), pred(op)); isJmp(op.Next).AssertImplies(uncond(op.Next)); if (cond(op)) link(op.Prev, target(op.Next), pred(op).Negate()); } else if (op is Throw) { // do nothing - throw doesn't create links } else { if (op.Prev == null) link(null, op, null); if (isJmp(op.Next)) continue; var blk = op2blocks.GetOrDefault(op); var blk_next = op2blocks.GetOrDefault(op.Next); if (blk != blk_next) link(op, op.Next, null); } } // yield control to the next step of the pipeline return cfg; }