private static void CountUses(LowMethod <TRegister> method, int[] uses) { // A variable is counted as used if it has a fixed storage location // For example, the Return op implicitly uses the local stored in the return register for (var i = 0; i < method.Locals.Count; i++) { if (method.Locals[i].RequiredLocation.IsSet) { uses[i] += FixedLocationSentinel; // Distinguish from ordinary locals } } // Go through the instructions and count reads (not writes) foreach (var block in method.Blocks) { foreach (var inst in block.Instructions) { if (inst.UsesLeft) { uses[inst.Left]++; } if (inst.UsesRight) { uses[inst.Right]++; } } } }
[TestCase(X64Register.R14)] // Never allocated in this method without a requirement public void Register_requirement_is_respected(X64Register required) { var method = new LowMethod <X64Register>(); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Bool)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Bool, required)); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.LoadInt, 0, 0, 0, 1), // Load 1 -> #0 new LowInstruction(LowOp.Move, 1, 0, 0, 0), // Move #0 -> #1 new LowInstruction(LowOp.Compare, 0, 1, 0, 0), // Compare #1, #0 new LowInstruction(LowOp.Return, 0, 0, 0, 0) // Return #0 }, Predecessors = Array.Empty <int>(), Successors = Array.Empty <int>() }); var(rewritten, allocationMap) = X64RegisterAllocator.Allocate(method); AssertDump(rewritten, @" LB_0: LoadInt 0 0 1 -> 0 Move 0 0 0 -> 1 Compare 1 0 0 -> 0 Return 0 0 0 -> 0"); Assert.That(allocationMap.Get(0).localIndex, Is.EqualTo(0)); Assert.That(allocationMap.Get(1).localIndex, Is.EqualTo(1)); Assert.That(allocationMap.Get(0).location.IsSet, Is.True); Assert.That(allocationMap.Get(1).location.IsSet, Is.True); Assert.That(allocationMap.Get(1).location.Register, Is.EqualTo(required)); }
public static LowMethod <X64Register> Lower(CompiledMethod highMethod) { Debug.Assert(highMethod.Body != null); Debug.Assert(highMethod.Body.BasicBlocks.Count > 0); var lowMethod = new LowMethod <X64Register>(); // Create locals for SSA values // Additional locals may be created by instructions var paramCount = 0; foreach (var value in highMethod.Values) { if (value.Flags.HasFlag(LocalFlags.Parameter)) { paramCount++; } lowMethod.Locals.Add(new LowLocal <X64Register>(value.Type)); } // Convert each basic block var methodHasCalls = false; for (var i = 0; i < highMethod.Body.BasicBlocks.Count; i++) { var highBlock = highMethod.Body.BasicBlocks[i]; lowMethod.Blocks.Add(ConvertBlock(highBlock, highMethod, lowMethod, i == 0, paramCount, out var blockHasCalls)); methodHasCalls |= blockHasCalls; } lowMethod.IsLeafMethod = !methodHasCalls; return(lowMethod); }
private static void AssertDump(LowMethod <X64Register> method, string expected) { var dumpWriter = new StringWriter(); method.Dump(dumpWriter, false); Assert.That(dumpWriter.ToString().Replace("\r\n", "\n").Trim(), Is.EqualTo(expected.Replace("\r\n", "\n").Trim())); }
protected static void AssertDump <TRegister>(LowMethod <TRegister> method, string expected) where TRegister : struct, Enum { var dumpWriter = new StringWriter(); method.Dump(dumpWriter, true); Assert.That(dumpWriter.ToString().Replace("\r\n", "\n").Trim(), Is.EqualTo(expected.Replace("\r\n", "\n").Trim())); }
private static LowBlock ConvertBlock(BasicBlock highBlock, CompiledMethod highMethod, LowMethod <X64Register> methodInProgress, bool isFirstBlock, int paramCount, out bool containsCalls) { var lowBlock = new LowBlock { Phis = highBlock.Phis, Predecessors = highBlock.Predecessors }; // Initialize the list of successors if (highBlock.AlternativeSuccessor >= 0) { lowBlock.Successors = new[] { highBlock.AlternativeSuccessor, highBlock.DefaultSuccessor }; } else if (highBlock.DefaultSuccessor >= 0) { lowBlock.Successors = new[] { highBlock.DefaultSuccessor }; } else { lowBlock.Successors = Array.Empty <int>(); } // At the start of the first block, we must copy parameters from fixed-location temps to freely assigned locals if (isFirstBlock) { // This assumes that the first paramCount locals are the parameters for (var i = 0; i < paramCount; i++) { methodInProgress.Locals.Add( new LowLocal <X64Register>(highMethod.Values[i].Type, GetLocationForParameter(i))); var tempIndex = methodInProgress.Locals.Count - 1; lowBlock.Instructions.Add(new LowInstruction(LowOp.Move, i, tempIndex, 0, 0)); } } // Convert the instructions containsCalls = false; var returns = false; ConvertInstructions(highBlock, highMethod, lowBlock, methodInProgress, ref containsCalls, ref returns); if (!returns) { lowBlock.Instructions.Add(new LowInstruction(LowOp.Jump, highBlock.DefaultSuccessor, 0, 0, 0)); } return(lowBlock); }
public void Optimizer_handles_near_empty_method() { var method = new LowMethod <X64Register>(); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.Nop, 0, 0, 0, 0) } }); Assert.That(() => PeepholeOptimizer <X64Register> .Optimize(method), Throws.Nothing); }
/// <summary> /// Optimizes the given LIR. /// </summary> /// <param name="method">The method to optimize. The blocks will be mutated in place.</param> public static void Optimize(LowMethod <TRegister> method) { // As an extension to a basic peephole optimizer, count the uses of locals // TODO: Get a pooled array var uses = new int[method.Locals.Count]; CountUses(method, uses); // Optimize each basic block on its own // TODO: Add more aggressive patterns that are only enabled on optimizing builds foreach (var block in method.Blocks) { OptimizeBlock(block.Instructions, uses); } }
public void Noncommutative_arithmetic_destination_is_not_same_as_right(LowOp op) { var method = new LowMethod <X64Register>(); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32, X64Register.Rax)); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.LoadInt, 0, 0, 0, 1), // Load 1 -> #0 new LowInstruction(LowOp.LoadInt, 1, 0, 0, 1), // Load 1 -> #1 new LowInstruction(op, 2, 0, 1, 0), // Subtract/Shift #0 - #1 -> #2 new LowInstruction(LowOp.Test, 0, 0, 0, 0), // Use #0 new LowInstruction(LowOp.Move, 3, 2, 0, 0), // Use #2 new LowInstruction(LowOp.Return, 0, 3, 0, 0) }, Predecessors = Array.Empty <int>(), Successors = Array.Empty <int>() }); var(rewritten, allocationMap) = X64RegisterAllocator.Allocate(method); AssertDump(rewritten, $@" LB_0: LoadInt 0 0 1 -> 0 LoadInt 0 0 1 -> 1 {op} 0 1 0 -> 2 Test 0 0 0 -> 0 Move 2 0 0 -> 3 Return 3 0 0 -> 0"); Assert.That(allocationMap.Get(0).localIndex, Is.EqualTo(0)); Assert.That(allocationMap.Get(1).localIndex, Is.EqualTo(1)); Assert.That(allocationMap.Get(2).localIndex, Is.EqualTo(2)); // It would be tempting to assign #1 and #2 the same register, but that // is not good for x64: we would have to emit "mov r1, r0; sub r1, r1" where // local #1 is stored in r1 but local #2 lives there up until the last instruction. Assert.That(allocationMap.Get(1).location.Register, Is.Not.EqualTo(allocationMap.Get(2).location.Register)); }
Allocate(LowMethod <X64Register> method) { // Compute the live intervals var intervals = new List <Interval>(method.Locals.Count); var blockEnds = new int[method.Blocks.Count]; ComputeLiveIntervals(method, intervals, blockEnds); // Sort the intervals by start position intervals.Sort(); // Allocate registers by doing a linear scan DoLinearScan(intervals); // Rewrite the method to use interval numbers instead of local indices // This also resolves Phi functions by converting them into moves/swaps var rewrittenMethod = RewriteMethod(method, intervals, blockEnds); return(rewrittenMethod, new AllocationInfo <X64Register>(intervals)); }
public void Intersecting_variables_in_single_block_have_separate_registers() { var method = new LowMethod <X64Register>(); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Bool)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Bool)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Void, X64Register.Rax)); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.LoadInt, 0, 0, 0, 1), // Load 1 -> #0 new LowInstruction(LowOp.LoadInt, 1, 0, 0, 1), // Load 1 -> #1 new LowInstruction(LowOp.Compare, 0, 0, 1, 0), // Compare #0, #1 new LowInstruction(LowOp.LoadInt, 2, 0, 0, 0), // Initialize void return new LowInstruction(LowOp.Return, 0, 2, 0, 0) }, Predecessors = Array.Empty <int>(), Successors = Array.Empty <int>() }); var(rewritten, allocationMap) = X64RegisterAllocator.Allocate(method); AssertDump(rewritten, @" LB_0: LoadInt 0 0 1 -> 0 LoadInt 0 0 1 -> 1 Compare 0 1 0 -> 0 LoadInt 0 0 0 -> 2 Return 2 0 0 -> 0"); Assert.That(allocationMap.Get(0).localIndex, Is.EqualTo(0)); Assert.That(allocationMap.Get(1).localIndex, Is.EqualTo(1)); Assert.That(allocationMap.Get(2).localIndex, Is.EqualTo(2)); Assert.That(allocationMap.Get(0).location.IsSet, Is.True); Assert.That(allocationMap.Get(1).location.IsSet, Is.True); Assert.That(allocationMap.Get(0).location, Is.Not.EqualTo(allocationMap.Get(1).location)); Assert.That(allocationMap.Get(2).location.Register, Is.EqualTo(X64Register.Rax)); }
/// <summary> /// Phase 3: Rewrite the instructions to reference intervals and convert Phis to moves. /// </summary> /// <param name="original">The original LIR method that was passed to the allocator.</param> /// <param name="intervals">The list of intervals with allocation decisions done.</param> /// <param name="blockEnds"> /// The block ends computed by /// <see cref="ComputeLiveIntervals(LowMethod{X64Register}, List{Interval{X64Register}}, int[])"/>. /// </param> private static LowMethod <X64Register> RewriteMethod(LowMethod <X64Register> original, List <Interval> intervals, int[] blockEnds) { var result = new LowMethod <X64Register>(original.Locals, new List <LowBlock>(original.Blocks.Count), original.IsLeafMethod); // Replace instruction operands with references to intervals instead of locals var instIndex = 0; for (var blockIndex = 0; blockIndex < original.Blocks.Count; blockIndex++) { // TODO: Account for instructions emitted by Phi resolution in the capacity calculation var oldBlock = original.Blocks[blockIndex]; var newBlock = new LowBlock(new List <LowInstruction>(oldBlock.Instructions.Count)) { Phis = oldBlock.Phis, // This is required in ConvertPhisToMoves but then nulled out Predecessors = oldBlock.Predecessors, Successors = oldBlock.Successors }; instIndex++; foreach (var inst in oldBlock.Instructions) { newBlock.Instructions.Add(new LowInstruction(inst.Op, inst.UsesDest ? ConvertLocalToInterval(inst.Dest, intervals, instIndex) : inst.Dest, inst.UsesLeft ? ConvertLocalToInterval(inst.Left, intervals, instIndex) : inst.Left, inst.UsesRight && inst.Right >= 0 ? ConvertLocalToInterval(inst.Right, intervals, instIndex) : inst.Right, inst.Data)); instIndex++; } result.Blocks.Add(newBlock); } // Resolve Phi functions ConvertPhisToMoves(result, intervals, blockEnds); return(result); }
public void Load_right_and_arithmetic_are_folded(LowOp arithmeticOp, long constant) { // The right operand of shift has special location on x64 var isShift = arithmeticOp == LowOp.ShiftLeft || arithmeticOp == LowOp.ShiftArithmeticRight; var rightLocation = isShift ? X64Register.Rdx : X64Register.Invalid; var method = new LowMethod <X64Register>(); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32, requiredLocation: X64Register.Rcx)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32, requiredLocation: rightLocation)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.LoadInt, 1, 0, 0, (ulong)constant), // Load constant -> #1 new LowInstruction(arithmeticOp, 2, 0, 1, 0) // #0 op #1 -> #2 } }); var expected = @$ " ; #0 int32 [rcx]
public void Division_reserves_rdx(LowOp op) { var method = new LowMethod <X64Register>(); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32, X64Register.Rax)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32, X64Register.Rax)); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.LoadInt, 0, 0, 0, 1), // Load 1 -> #0 new LowInstruction(LowOp.LoadInt, 1, 0, 0, 1), // Load 1 -> #1 new LowInstruction(LowOp.LoadInt, 2, 0, 0, 1), // Load 1 -> #2 new LowInstruction(op, 3, 2, 2, 0), // Divide #2 / #2 -> #3 new LowInstruction(LowOp.Test, 0, 0, 0, 0), // Use #0 new LowInstruction(LowOp.Move, 0, 1, 0, 0), // Use #1 new LowInstruction(LowOp.Return, 0, 3, 0, 0) }, Predecessors = Array.Empty <int>(), Successors = Array.Empty <int>() }); var(_, allocationMap) = X64RegisterAllocator.Allocate(method); // RDX holds the upper part of dividend and therefore must be reserved for (var i = 0; i < allocationMap.IntervalCount; i++) { var(location, localIndex) = allocationMap.Get(i); if (localIndex >= 0) { Assert.That(location.Register, Is.Not.EqualTo(X64Register.Rdx)); } } }
public void Single_phi_in_a_loop() { var method = new LowMethod <X64Register>(); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.LoadInt, 1, 0, 0, 1), // Load 1 -> #1 new LowInstruction(LowOp.IntegerAdd, 2, 0, 1, 0), // Add #0, #1 -> #2 new LowInstruction(LowOp.Jump, 0, 0, 0, 0) }, Phis = new[] { new Phi(0, ImmutableList <int> .Empty.Add(2)) }, Predecessors = new int[] { 0 }, Successors = new int[] { 0 } }); var(rewritten, allocationMap) = X64RegisterAllocator.Allocate(method); // #0 and #2 have the same register, therefore there is no move instruction AssertDump(rewritten, @" LB_0: LoadInt 0 0 1 -> 1 IntegerAdd 0 1 0 -> 2 Jump 0 0 0 -> 0"); Assert.That(allocationMap.Get(0).localIndex, Is.EqualTo(0)); Assert.That(allocationMap.Get(1).localIndex, Is.EqualTo(1)); Assert.That(allocationMap.Get(0).location.IsSet, Is.True); Assert.That(allocationMap.Get(1).location.IsSet, Is.True); Assert.That(allocationMap.Get(0).location, Is.EqualTo(allocationMap.Get(2).location)); Assert.That(allocationMap.Get(0).location, Is.Not.EqualTo(allocationMap.Get(1).location)); }
public void Load_and_unnecessary_move_are_folded() { var method = new LowMethod <X64Register>(); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.LoadInt, 0, 0, 0, 1234), // Load 1234 -> #0 new LowInstruction(LowOp.Move, 1, 0, 0, 0) // Move #0 -> #1 } }); const string expected = @" ; #0 int32 [?] ; #1 int32 [?] LB_0: LoadInt 0 0 1234 -> 1 "; OptimizeAndVerify(method, expected); }
private void EmitBlock(int blockIndex, LowMethod <X64Register> method, AllocationInfo <X64Register> allocation, CompiledMethod highMethod) { var block = method.Blocks[blockIndex]; var emitter = _peWriter.Emitter; // If this is the first block, save callee-saved registers and allocate shadow space for called methods // TODO: The shadow space could be reserved by the register allocator instead, once it supports the stack if (blockIndex == 0) { EmitRegisterSave(emitter, method); } foreach (var inst in block.Instructions) { switch (inst.Op) { case LowOp.LoadInt: { var destLocation = allocation.Get(inst.Dest).location; if (inst.Data == 0) { // "xor reg, reg" is the preferred way to zero a register on x64. This optimization // is not done by the peephole optimizer because it would break the SSA form. emitter.EmitGeneralBinaryOp(BinaryOp.BitwiseXor, destLocation, destLocation, 4); } else { emitter.EmitLoad(destLocation, inst.Data); } break; } case LowOp.Move: { var(sourceLocation, _) = allocation.Get(inst.Left); var(destLocation, destLocalIndex) = allocation.Get(inst.Dest); var destLocal = method.Locals[destLocalIndex]; // Move booleans always as 4 byte values so that we don't need to care about zero extension var operandSize = destLocal.Type.Equals(SimpleType.Bool) ? 4 : destLocal.Type.SizeInBytes; if (sourceLocation != destLocation) { emitter.EmitMov(destLocation, sourceLocation, operandSize); } break; } case LowOp.Swap: emitter.EmitExchange(allocation.Get(inst.Left).location, allocation.Get(inst.Right).location); break; case LowOp.IntegerAdd: EmitIntegerBinaryOp(BinaryOp.Add, in inst, method, allocation); break; case LowOp.IntegerSubtract: EmitIntegerBinaryOp(BinaryOp.Subtract, in inst, method, allocation); break; case LowOp.IntegerMultiply: EmitIntegerBinaryOp(BinaryOp.Multiply, in inst, method, allocation); break; case LowOp.IntegerDivide: case LowOp.IntegerModulo: { // The dividend is already guaranteed to be in RAX, and RDX is reserved. // We must sign-extend RAX to RDX and then emit the division instruction. // The desired result is either in RAX (divide) or RDX (modulo). var(leftLocation, leftLocalIndex) = allocation.Get(inst.Left); var(rightLocation, _) = allocation.Get(inst.Right); var operandSize = method.Locals[leftLocalIndex].Type.SizeInBytes; Debug.Assert(leftLocation.Register == X64Register.Rax); Debug.Assert(allocation.Get(inst.Dest).location.Register == X64Register.Rax || allocation.Get(inst.Dest).location.Register == X64Register.Rdx); emitter.EmitExtendRaxToRdx(operandSize); emitter.EmitSignedDivide(rightLocation, operandSize); break; } case LowOp.IntegerNegate: EmitIntegerUnaryOp(UnaryOp.Negate, in inst, method, allocation); break; case LowOp.BitwiseNot: EmitIntegerUnaryOp(UnaryOp.Not, in inst, method, allocation); break; case LowOp.BitwiseAnd: EmitIntegerBinaryOp(BinaryOp.BitwiseAnd, in inst, method, allocation); break; case LowOp.BitwiseOr: EmitIntegerBinaryOp(BinaryOp.BitwiseOr, in inst, method, allocation); break; case LowOp.BitwiseXor: EmitIntegerBinaryOp(BinaryOp.BitwiseXor, in inst, method, allocation); break; case LowOp.ShiftLeft: EmitShift(ShiftType.Left, in inst, method, allocation); break; case LowOp.ShiftArithmeticRight: EmitShift(ShiftType.ArithmeticRight, in inst, method, allocation); break; case LowOp.Compare: { // TODO: Can the left and right operands have different sizes? var(leftLocation, leftLocalIndex) = allocation.Get(inst.Left); var leftLocal = method.Locals[leftLocalIndex]; var operandSize = leftLocal.Type.Equals(SimpleType.Bool) ? 4 : leftLocal.Type.SizeInBytes; if (inst.Right == -1) { // Comparison with a constant emitter.EmitCmpWithImmediate(leftLocation, (int)inst.Data, operandSize); } else { // Comparison with another local emitter.EmitCmp(leftLocation, allocation.Get(inst.Right).location, operandSize); } break; } case LowOp.Test: { var srcDestLocation = allocation.Get(inst.Left).location; emitter.EmitTest(srcDestLocation, srcDestLocation); break; } case LowOp.SetIfEqual: EmitConditionalSet(X64Condition.Equal, allocation.Get(inst.Dest).location); break; case LowOp.SetIfNotEqual: EmitConditionalSet(X64Condition.NotEqual, allocation.Get(inst.Dest).location); break; case LowOp.SetIfLess: EmitConditionalSet(X64Condition.Less, allocation.Get(inst.Dest).location); break; case LowOp.SetIfLessOrEqual: EmitConditionalSet(X64Condition.LessOrEqual, allocation.Get(inst.Dest).location); break; case LowOp.SetIfGreater: EmitConditionalSet(X64Condition.Greater, allocation.Get(inst.Dest).location); break; case LowOp.SetIfGreaterOrEqual: EmitConditionalSet(X64Condition.GreaterOrEqual, allocation.Get(inst.Dest).location); break; case LowOp.Call: { var calleeName = highMethod.CallInfos[inst.Left].CalleeFullName; if (_peWriter.TryGetMethodOffset((int)inst.Data, out var knownOffset)) { // If the method offset is already known, emit a complete call emitter.EmitCall(knownOffset, calleeName); } else { // Otherwise, the offset must be fixed up later emitter.EmitCallWithFixup((int)inst.Data, calleeName, out var fixup); _peWriter.AddCallFixup(fixup); } break; } case LowOp.CallImported: { var calleeName = highMethod.CallInfos[inst.Left].CalleeFullName; emitter.EmitCallIndirectWithFixup((int)inst.Data, calleeName, out var fixup); _peWriter.AddCallFixup(fixup); break; } case LowOp.Jump: { // Do not emit a jump for a simple fallthrough if (inst.Dest == blockIndex + 1) { return; } // Don't bother creating a fixup for a backward branch where the destination is already known if (inst.Dest <= blockIndex) { emitter.EmitJmp(inst.Dest, _blockPositions[inst.Dest]); } else { emitter.EmitJmpWithFixup(inst.Dest, out var fixup); _fixupsForMethod.Add(fixup); } break; } case LowOp.JumpIfEqual: EmitConditionalJump(X64Condition.Equal, inst.Dest, blockIndex); break; case LowOp.JumpIfNotEqual: EmitConditionalJump(X64Condition.NotEqual, inst.Dest, blockIndex); break; case LowOp.JumpIfLess: EmitConditionalJump(X64Condition.Less, inst.Dest, blockIndex); break; case LowOp.JumpIfLessOrEqual: EmitConditionalJump(X64Condition.LessOrEqual, inst.Dest, blockIndex); break; case LowOp.JumpIfGreater: EmitConditionalJump(X64Condition.Greater, inst.Dest, blockIndex); break; case LowOp.JumpIfGreaterOrEqual: EmitConditionalJump(X64Condition.GreaterOrEqual, inst.Dest, blockIndex); break; case LowOp.Return: EmitReturn(emitter, method); return; case LowOp.Nop: break; default: throw new NotImplementedException("Unimplemented LIR opcode: " + inst.Op); } } }
public void Complex_phi_chain_is_not_allocated_the_same_register() { // void Swap() { // int32 a = 10; // int32 b = 11; // while (a < b) { // int32 temp = a; // a = b; // b = temp; // } // } var method = new LowMethod <X64Register>(); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Int32)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Void, X64Register.Rax)); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.LoadInt, 0, 0, 0, 10), // Load 10 -> #0 new LowInstruction(LowOp.LoadInt, 1, 0, 0, 11), // Load 11 -> #1 new LowInstruction(LowOp.Jump, 1, 0, 0, 0) }, Predecessors = Array.Empty <int>(), Successors = new[] { 1 }, }); method.Blocks.Add(new LowBlock { Phis = new List <Phi>() { new Phi(2, new[] { 0, 3 }.ToImmutableList()), new Phi(3, new[] { 1, 2 }.ToImmutableList()) }, Instructions = { new LowInstruction(LowOp.Compare, 0, 2, 3, 0), // Compare #2, #3 new LowInstruction(LowOp.JumpIfLess, 3, 0, 0, 0), // JumpIfLess LB_3 new LowInstruction(LowOp.Jump, 2, 0, 0, 0), // Jump LB_2 }, Predecessors = new[] { 0, 2 }, Successors = new[] { 3, 2 }, }); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.Jump, 1, 0, 0, 0) // Jump LB_1 }, Predecessors = new[] { 1 }, Successors = new[] { 1 }, }); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.LoadInt, 4, 0, 0, 0), // Void return new LowInstruction(LowOp.Return, 0, 4, 0, 0) // Return }, Predecessors = new[] { 1 }, Successors = Array.Empty <int>(), }); // Act var(converted, map) = X64RegisterAllocator.Allocate(method); // Assert AssertDump(converted, @" LB_0: LoadInt 0 0 10 -> 0 LoadInt 0 0 11 -> 1 Jump 0 0 0 -> 1 LB_1: Compare 2 3 0 -> 0 JumpIfLess 0 0 0 -> 3 Jump 0 0 0 -> 2 LB_2: Swap 3 2 0 -> 0 Jump 0 0 0 -> 1 LB_3: LoadInt 0 0 0 -> 4 Return 4 0 0 -> 0"); // The Phi destinations must have intersecting intervals and therefore different registers, // while the values from the initial block should not need any copies Assert.That(map.Get(3).location, Is.Not.EqualTo(map.Get(4).location)); Assert.That(map.Get(0).location, Is.EqualTo(map.Get(2).location)); Assert.That(map.Get(1).location, Is.EqualTo(map.Get(3).location)); }
private static void ConvertInstructions(BasicBlock highBlock, CompiledMethod highMethod, LowBlock lowBlock, LowMethod <X64Register> methodInProgress, ref bool containsCalls, ref bool returns) { foreach (var inst in highBlock.Instructions) { switch (inst.Operation) { case Opcode.Add: case Opcode.BitwiseAnd: case Opcode.BitwiseOr: case Opcode.BitwiseXor: case Opcode.Multiply: case Opcode.Subtract: ConvertBinaryArithmetic(in inst, lowBlock, methodInProgress); break; case Opcode.ArithmeticNegate: ConvertUnaryArithmetic(in inst, lowBlock, methodInProgress); break; case Opcode.BitwiseNot: if (methodInProgress.Locals[(int)inst.Left].Type.Equals(SimpleType.Bool)) { // For booleans, BitwiseNot is interpreted as a logical NOT. // Convert it into a Test followed by SetIfZero (SetIfEqual) lowBlock.Instructions.Add(new LowInstruction(LowOp.Test, 0, (int)inst.Left, 0, 0)); lowBlock.Instructions.Add(new LowInstruction(LowOp.SetIfEqual, inst.Destination, 0, 0, 0)); } else { ConvertUnaryArithmetic(in inst, lowBlock, methodInProgress); } break; case Opcode.BranchIf: ConvertBranchIf(lowBlock, highBlock, (int)inst.Left); break; case Opcode.Call: containsCalls = true; ConvertCall(lowBlock, highMethod.CallInfos[(int)inst.Left], inst, methodInProgress); break; case Opcode.Divide: ConvertDivisionOrModulo(in inst, lowBlock, methodInProgress); break; case Opcode.Equal: ConvertCompare(in inst, LowOp.SetIfEqual, lowBlock); break; case Opcode.Less: ConvertCompare(in inst, LowOp.SetIfLess, lowBlock); break; case Opcode.LessOrEqual: ConvertCompare(in inst, LowOp.SetIfLessOrEqual, lowBlock); break; case Opcode.Load: lowBlock.Instructions.Add(new LowInstruction(LowOp.LoadInt, inst.Destination, 0, 0, inst.Left)); break; case Opcode.Modulo: ConvertDivisionOrModulo(in inst, lowBlock, methodInProgress); break; case Opcode.Return: returns = true; ConvertReturn(lowBlock, (int)inst.Left, highMethod, methodInProgress); break; case Opcode.ShiftLeft: case Opcode.ShiftRight: ConvertShift(in inst, lowBlock, methodInProgress); break; default: throw new NotImplementedException("Unimplemented opcode to lower: " + inst.Operation); } } }
public void Call_instruction_reserves_registers(LowOp callOp) { var method = new LowMethod <X64Register>(); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Bool)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Bool)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Bool)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Bool)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Bool)); method.Locals.Add(new LowLocal <X64Register>(SimpleType.Void, X64Register.Rax)); method.Blocks.Add(new LowBlock { Instructions = { new LowInstruction(LowOp.LoadInt, 0, 0, 0, 1), // Load 1 -> #0 new LowInstruction(LowOp.LoadInt, 1, 0, 0, 1), // Load 1 -> #1 new LowInstruction(LowOp.LoadInt, 2, 0, 0, 1), // Load 1 -> #2 new LowInstruction(LowOp.LoadInt, 3, 0, 0, 1), // Load 1 -> #3 new LowInstruction(LowOp.LoadInt, 4, 0, 0, 1), // Load 1 -> #4 new LowInstruction(callOp, 5, 0, 0, 1234), // Call - this trashes rax, rcx, rdx, r8 and r9 new LowInstruction(LowOp.Test, 0, 0, 0, 0), // Test #0 new LowInstruction(LowOp.Test, 0, 1, 0, 0), // Test #1 new LowInstruction(LowOp.Test, 0, 2, 0, 0), // Test #2 new LowInstruction(LowOp.Test, 0, 3, 0, 0), // Test #3 new LowInstruction(LowOp.Test, 0, 4, 0, 0), // Test #4 new LowInstruction(LowOp.Return, 5, 0, 0, 0) }, Predecessors = Array.Empty <int>(), Successors = Array.Empty <int>() }); var(_, allocationMap) = X64RegisterAllocator.Allocate(method); // No local variable should be assigned to a blocked register for (var i = 0; i < allocationMap.IntervalCount; i++) { var(location, localIndex) = allocationMap.Get(i); if (localIndex == -1) { continue; } if (localIndex == 5) { Assert.That(location.Register, Is.EqualTo(X64Register.Rax)); continue; } Assert.That(location.IsSet, Is.True); Assert.That(location.Register, Is.Not.EqualTo(X64Register.Rax)); Assert.That(location.Register, Is.Not.EqualTo(X64Register.Rcx)); Assert.That(location.Register, Is.Not.EqualTo(X64Register.Rdx)); Assert.That(location.Register, Is.Not.EqualTo(X64Register.R8)); Assert.That(location.Register, Is.Not.EqualTo(X64Register.R9)); Assert.That(location.Register, Is.Not.EqualTo(X64Register.R10)); Assert.That(location.Register, Is.Not.EqualTo(X64Register.R11)); // ..and as a general sanity check, do not allocate the stack pointer! Assert.That(location.Register, Is.Not.EqualTo(X64Register.Rsp)); } }
private static void ConvertPhisToMoves(LowMethod <X64Register> method, List <Interval> intervals, int[] blockEnds) { // This also handles different locations between basic blocks var movesToDo = new List <(int fromInterval, int toInterval)>(); for (var blockIndex = 0; blockIndex < method.Blocks.Count; blockIndex++) { var instIndex = blockIndex == 0 ? 0 : blockEnds[blockIndex - 1] + 1; var block = method.Blocks[blockIndex]; if (block.Predecessors is null) { continue; } for (var i = 0; i < block.Predecessors.Count; i++) { movesToDo.Clear(); var predIndex = block.Predecessors[i]; var phiOperandIndex = GetPhiOperandIndex(predIndex, block); // Go through all intervals live at the start of this block for (var intervalIndex = 0; intervalIndex < intervals.Count; intervalIndex++) { var interval = intervals[intervalIndex]; if (interval.Start == instIndex) { // If the interval starts at the very start of this block, it may be defined by a Phi // Find the Phi and add a move to resolve var foundPhi = false; if (block.Phis is object) { foreach (var phi in block.Phis) { if (phi.Destination == interval.LocalIndex) { var source = ConvertLocalToInterval(phi.Operands[phiOperandIndex], intervals, blockEnds[predIndex]); var dest = ConvertLocalToInterval(phi.Destination, intervals, instIndex); movesToDo.Add((source, dest)); foundPhi = true; break; } } } if (!foundPhi) { // Else, the interval continues the lifetime of an existing local // Since its location may have changed we need to emit a move // TODO: Skip redundant moves var source = ConvertLocalToInterval(interval.LocalIndex, intervals, blockEnds[predIndex]); movesToDo.Add((source, intervalIndex)); } } } // Resolve the moves var pred = method.Blocks[predIndex]; if (pred.Successors.Count == 1) { // If the predecessor only has a single successor, emit the copies at the end of it // As a sanity check, we expect the jump instruction to be the last instruction of the block if (pred.Instructions[pred.Instructions.Count - 1].Op != LowOp.Jump) { throw new InvalidOperationException("Expected unconditional jump at the end of block."); } EmitRegisterMoves(movesToDo, pred.Instructions, pred.Instructions.Count - 1, intervals); } else { // Else, emit the copies at the start of this basic block // This can only succeed if this basic block has no other predecessors if (block.Predecessors.Count > 1) { throw new InvalidOperationException("Critical edges must be split."); } EmitRegisterMoves(movesToDo, block.Instructions, 0, intervals); } } // The Phi list is neither needed nor relevant any more block.Phis = null; } }
/// <summary> /// Phase 1: Compute live intervals. /// Each interval maps to a single local in a contiguous region of instructions. /// The regions are defined by instruction counts, where the set of Phis is considered one instruction. /// </summary> /// <param name="method">The original LIR method where instructions refer to locals.</param> /// <param name="intervals">An empty list that will be populated by the computed intervals.</param> /// <param name="blockEnds"> /// An empty array that has an element for each block. /// This will be populated with the last instruction index (inclusive) of each basic block. /// </param> private static void ComputeLiveIntervals(LowMethod <X64Register> method, List <Interval> intervals, int[] blockEnds) { // We must start from the maximum index as we traverse the blocks in reverse order // NOTE: This complicates the instruction counting quite a bit, so be careful! var instIndex = 0; foreach (var block in method.Blocks) { instIndex += block.Instructions.Count + 1; } // TODO: Is this a sensible design correctness/performance-wise? var latestIntervalForLocal = new int[method.Locals.Count]; for (var i = 0; i < latestIntervalForLocal.Length; i++) { latestIntervalForLocal[i] = -1; } // A temporary data structure - this is not updated by the loop header handling var liveIn = new SortedSet <int> [method.Blocks.Count]; for (var i = 0; i < liveIn.Length; i++) { liveIn[i] = new SortedSet <int>(); } // The reverse order is used because it typically sees block successors first for (var blockIndex = method.Blocks.Count - 1; blockIndex >= 0; blockIndex--) { var block = method.Blocks[blockIndex]; var blockEnd = instIndex - 1; blockEnds[blockIndex] = blockEnd; var blockStart = blockEnd - block.Instructions.Count; var live = liveIn[blockIndex]; // Initialize the live set to contain all locals that are live at the beginning of some // succeeding block, and all locals used in Phis of the succeeding blocks if (block.Successors is object) { foreach (var succ in block.Successors) { live.UnionWith(liveIn[succ]); var phis = method.Blocks[succ].Phis; if (phis is null) { continue; } var phiPosition = GetPhiOperandIndex(blockIndex, method.Blocks[succ]); foreach (var phi in phis) { live.Add(phi.Operands[phiPosition]); } } } // Create an interval for each live local // TODO: Consider merging adjacent intervals of a single local foreach (var liveLocal in live) { AddIntervalForLocal(liveLocal, blockStart, blockEnd); } // Then go through the instructions in reverse order for (var j = block.Instructions.Count - 1; j >= 0; j--) { var inst = block.Instructions[j]; instIndex--; // Since the LIR is in SSA form, the output operand is not live before this instruction if (inst.UsesDest) { if (latestIntervalForLocal[inst.Dest] == -1) { // If the result local is not used anywhere, we need to create a short interval here AddIntervalForLocal(inst.Dest, instIndex, instIndex); } else { intervals[latestIntervalForLocal[inst.Dest]].Start = instIndex; } live.Remove(inst.Dest); } // Input operands are defined before their uses, so we only need to add an interval // if the local is not yet live. Initially we set the lifetime to start at the start // of the block, but this may be shortened if the local is defined in this block. if (inst.UsesLeft && !live.Contains(inst.Left)) { AddIntervalForLocal(inst.Left, blockStart, instIndex); live.Add(inst.Left); } // The right-hand operand may be set to -1 to signal a constant (immediate) argument if (inst.UsesRight && inst.Right >= 0 && !live.Contains(inst.Right)) { AddIntervalForLocal(inst.Right, blockStart, instIndex); live.Add(inst.Right); } // Some instructions (e.g. calls) trash one or more registers AddX64SpecificIntervals(inst, instIndex); } // Remove Phi outputs from the live set if (block.Phis is object) { foreach (var phi in block.Phis) { intervals[latestIntervalForLocal[phi.Destination]].Use(instIndex - 1); live.Remove(phi.Destination); } } // The set of Phis is a single instruction (even if empty) instIndex--; Debug.Assert(instIndex == blockStart); // If this block is a loop header (has a predecessor with greater block index, or is // its own predecessor), extend the lifetimes of locals that are live for the entire loop foreach (var predIndex in block.Predecessors) { if (predIndex < blockIndex) { continue; } foreach (var liveLocal in live) { AddIntervalForLocal(liveLocal, blockStart, blockEnds[predIndex]); } } } Debug.Assert(instIndex == 0); // LOCAL HELPER METHODS void AddIntervalForLocal(int localIndex, int start, int end) { // If there already is an adjacent interval, update it instead of creating another // TODO: Is looking up in this cache enough? if (latestIntervalForLocal[localIndex] >= 0) { var existing = intervals[latestIntervalForLocal[localIndex]]; if (existing.Start <= end + 1 && existing.End >= start - 1) { existing.Use(start); existing.Use(end); return; } } // Else, create a new interval intervals.Add(new Interval() { LocalIndex = localIndex, Register = method.Locals[localIndex].RequiredLocation.Register, Start = start, End = end }); latestIntervalForLocal[localIndex] = intervals.Count - 1; } void AddX64SpecificIntervals(in LowInstruction inst, int instIndex) { // Prevent X64 trashing the right operand of subtraction/shift (see associated unit test) // except when the right operand is a constant. // Additionally, the right operand of shift is fixed to RCX, but Lowering has handled that if ((inst.Op == LowOp.IntegerSubtract || inst.Op == LowOp.ShiftLeft || inst.Op == LowOp.ShiftArithmeticRight) && inst.Right >= 0) { intervals[latestIntervalForLocal[inst.Right]].Use(instIndex + 1); } // In integer division, the dividend is stored in RDX:RAX. // The lower part is already handled since the source is a fixed temporary, // but we must prevent RDX from being used for the divisor. if (inst.Op == LowOp.IntegerDivide || inst.Op == LowOp.IntegerModulo) { intervals.Add(new Interval { Start = instIndex - 1, End = instIndex, Register = X64Register.Rdx }); } // Calls trash some registers, so we need to add intervals for them if (inst.Op == LowOp.Call || inst.Op == LowOp.CallImported) { // RAX is already reserved as the call result is stored in a local intervals.Add(new Interval { Start = instIndex, End = instIndex, Register = X64Register.Rcx }); intervals.Add(new Interval { Start = instIndex, End = instIndex, Register = X64Register.Rdx }); intervals.Add(new Interval { Start = instIndex, End = instIndex, Register = X64Register.R8 }); intervals.Add(new Interval { Start = instIndex, End = instIndex, Register = X64Register.R9 }); intervals.Add(new Interval { Start = instIndex, End = instIndex, Register = X64Register.R10 }); intervals.Add(new Interval { Start = instIndex, End = instIndex, Register = X64Register.R11 }); } } }