예제 #1
0
        /// <summary>
        /// http://http.developer.nvidia.com/GPUGems2/gpugems2_chapter34.html
        /// http://people.maths.ox.ac.uk/gilesm/pp10/lec2_2x2.pdf
        /// http://stackoverflow.com/questions/10119796/how-does-cuda-compiler-know-the-divergence-behaviour-of-warps
        /// http://www.istc-cc.cmu.edu/publications/papers/2011/SIMD.pdf
        /// http://hal.archives-ouvertes.fr/docs/00/62/26/54/PDF/collange_sympa2011_en.pdf
        /// http://users.ece.cmu.edu/~omutlu/pub/large-gpu-warps_micro11.pdf
        /// http://www.eecis.udel.edu/~cavazos/cisc879/papers/a3-han.pdf
        /// </summary>
        public IEnumerable <ExecutionResponse> Execute(
            VirtualMachine virtualMachine,
            ExecutionContext[] executionContexts,
            ExecutableInstruction[] instructions)
        {
            var warp = new Warp(executionContexts.Length);
            var activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, warp.DivergenceStack.Peek());
            var topOfDivergenceStack    = warp.DivergenceStack.Peek();

            while (topOfDivergenceStack.NextPC < instructions.Length)
            {
                var instruction = instructions[topOfDivergenceStack.NextPC];

                List <BitArray> activeMasks = null;

                switch (instruction.OpcodeType)
                {
                case ExecutableOpcodeType.Add:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Add);
                    }
                    break;

                case ExecutableOpcodeType.And:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.UInt, InstructionImplementations.And);
                    }
                    break;

                case ExecutableOpcodeType.Branch:
                    break;

                case ExecutableOpcodeType.BranchC:
                    activeMasks = new List <BitArray>
                    {
                        new BitArray(executionContexts.Length),
                        new BitArray(executionContexts.Length)
                    };
                    foreach (var thread in activeExecutionContexts)
                    {
                        var  src0   = GetOperandValue(thread, instruction.Operands[0], NumberType.UInt);
                        bool result = TestCondition(ref src0, instruction.TestBoolean);
                        activeMasks[0][thread.Index] = !result;
                        activeMasks[1][thread.Index] = result;
                    }
                    break;

                case ExecutableOpcodeType.Cut:
                case ExecutableOpcodeType.CutStream:
                    yield return(ExecutionResponse.Cut);

                    break;

                case ExecutableOpcodeType.Discard:
                    throw new NotImplementedException();

                case ExecutableOpcodeType.Div:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Div);
                    }
                    break;

                case ExecutableOpcodeType.Dp2:
                    foreach (var thread in activeExecutionContexts)
                    {
                        ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp2);
                    }
                    break;

                case ExecutableOpcodeType.Dp3:
                    foreach (var thread in activeExecutionContexts)
                    {
                        ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp3);
                    }
                    break;

                case ExecutableOpcodeType.Dp4:
                    foreach (var thread in activeExecutionContexts)
                    {
                        ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp4);
                    }
                    break;

                case ExecutableOpcodeType.Emit:
                case ExecutableOpcodeType.EmitStream:
                    yield return(ExecutionResponse.Emit);

                    break;

                case ExecutableOpcodeType.Eq:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Eq);
                    }
                    break;

                case ExecutableOpcodeType.Exp:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Exp);
                    }
                    break;

                case ExecutableOpcodeType.Frc:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Frc);
                    }
                    break;

                case ExecutableOpcodeType.FtoI:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoI);
                    }
                    break;

                case ExecutableOpcodeType.FtoU:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoU);
                    }
                    break;

                case ExecutableOpcodeType.Ge:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ge);
                    }
                    break;

                case ExecutableOpcodeType.IAdd:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.IAdd);
                    }
                    break;

                case ExecutableOpcodeType.IEq:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.IEq);
                    }
                    break;

                case ExecutableOpcodeType.IGe:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.IGe);
                    }
                    break;

                case ExecutableOpcodeType.ILt:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.ILt);
                    }
                    break;

                case ExecutableOpcodeType.IMad:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMad);
                    }
                    break;

                case ExecutableOpcodeType.IMin:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMin);
                    }
                    break;

                case ExecutableOpcodeType.INe:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.INe);
                    }
                    break;

                case ExecutableOpcodeType.INeg:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.INeg);
                    }
                    break;

                case ExecutableOpcodeType.IShl:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShl);
                    }
                    break;

                case ExecutableOpcodeType.IShr:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShr);
                    }
                    break;

                case ExecutableOpcodeType.ItoF:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Int, InstructionImplementations.ItoF);
                    }
                    break;

                case ExecutableOpcodeType.Log:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Log);
                    }
                    break;

                case ExecutableOpcodeType.Lt:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Lt);
                    }
                    break;

                case ExecutableOpcodeType.Mad:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mad);
                    }
                    break;

                case ExecutableOpcodeType.Max:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Max);
                    }
                    break;

                case ExecutableOpcodeType.Min:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Min);
                    }
                    break;

                case ExecutableOpcodeType.Mov:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mov);
                    }
                    break;

                case ExecutableOpcodeType.MovC:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.MovC);
                    }
                    break;

                case ExecutableOpcodeType.Mul:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mul);
                    }
                    break;

                case ExecutableOpcodeType.Ne:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ne);
                    }
                    break;

                case ExecutableOpcodeType.Ret:
                    yield return(ExecutionResponse.Finished);

                    break;

                case ExecutableOpcodeType.RoundNe:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNe);
                    }
                    break;

                case ExecutableOpcodeType.RoundNi:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNi);
                    }
                    break;

                case ExecutableOpcodeType.RoundPi:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundPi);
                    }
                    break;

                case ExecutableOpcodeType.RoundZ:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundZ);
                    }
                    break;

                case ExecutableOpcodeType.Rsq:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Rsq);
                    }
                    break;

                case ExecutableOpcodeType.DerivRtx:
                case ExecutableOpcodeType.RtxCoarse:
                    for (var i = 0; i < executionContexts.Length; i += 4)
                    {
                        var topLeft  = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                        var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);

                        var deltaX = Number4.Subtract(ref topRight, ref topLeft);

                        for (var j = i; j < i + 4; j++)
                        {
                            SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaX);
                        }
                    }
                    break;

                case ExecutableOpcodeType.RtxFine:
                    for (var i = 0; i < executionContexts.Length; i += 4)
                    {
                        var topLeft     = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                        var topRight    = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);
                        var bottomLeft  = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);
                        var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float);

                        var topDeltaX    = Number4.Subtract(ref topRight, ref topLeft);
                        var bottomDeltaX = Number4.Subtract(ref bottomRight, ref bottomLeft);

                        SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], topDeltaX);
                        SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], topDeltaX);

                        SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], bottomDeltaX);
                        SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], bottomDeltaX);
                    }
                    break;

                case ExecutableOpcodeType.DerivRty:
                case ExecutableOpcodeType.RtyCoarse:
                    for (var i = 0; i < executionContexts.Length; i += 4)
                    {
                        var topLeft    = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                        var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);

                        var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft);

                        for (var j = i; j < i + 4; j++)
                        {
                            SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaY);
                        }
                    }
                    break;

                case ExecutableOpcodeType.RtyFine:
                    for (var i = 0; i < executionContexts.Length; i += 4)
                    {
                        var topLeft     = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                        var topRight    = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);
                        var bottomLeft  = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);
                        var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float);

                        var leftDeltaY  = Number4.Subtract(ref bottomLeft, ref topLeft);
                        var rightDeltaY = Number4.Subtract(ref bottomRight, ref topRight);

                        SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], leftDeltaY);
                        SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], rightDeltaY);

                        SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], leftDeltaY);
                        SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], rightDeltaY);
                    }
                    break;

                case ExecutableOpcodeType.Sample:
                {
                    var srcResourceIndex = instruction.Operands[2].Indices[0].Value;
                    var srcResource      = virtualMachine.Textures[srcResourceIndex];
                    var srcSampler       = virtualMachine.Samplers[instruction.Operands[3].Indices[0].Value];
                    var textureSampler   = virtualMachine.TextureSamplers[srcResourceIndex];

                    if (textureSampler == null || srcResource == null)
                    {
                        var zero = new Number4();
                        foreach (var context in executionContexts)
                        {
                            SetRegisterValue(context, instruction.Operands[0], zero);
                        }
                    }
                    else
                    {
                        for (var i = 0; i < executionContexts.Length; i += 4)
                        {
                            var topLeft     = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                            var topRight    = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);
                            var bottomLeft  = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);
                            var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float);

                            var deltaX = Number4.Subtract(ref topRight, ref topLeft);
                            var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft);

                            SetRegisterValue(executionContexts[i + 0], instruction.Operands[0],
                                             textureSampler.SampleGrad(srcResource, srcSampler, ref topLeft,
                                                                       ref deltaX, ref deltaY));
                            SetRegisterValue(executionContexts[i + 1], instruction.Operands[0],
                                             textureSampler.SampleGrad(srcResource, srcSampler, ref topRight,
                                                                       ref deltaX, ref deltaY));
                            SetRegisterValue(executionContexts[i + 2], instruction.Operands[0],
                                             textureSampler.SampleGrad(srcResource, srcSampler, ref bottomLeft,
                                                                       ref deltaX, ref deltaY));
                            SetRegisterValue(executionContexts[i + 3], instruction.Operands[0],
                                             textureSampler.SampleGrad(srcResource, srcSampler, ref bottomRight,
                                                                       ref deltaX, ref deltaY));
                        }
                    }
                    break;
                }

                case ExecutableOpcodeType.Sqrt:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.Float, InstructionImplementations.Sqrt);
                    }
                    break;

                case ExecutableOpcodeType.UtoF:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.UInt, InstructionImplementations.UtoF);
                    }
                    break;

                case ExecutableOpcodeType.Xor:
                    foreach (var thread in activeExecutionContexts)
                    {
                        Execute(thread, instruction, NumberType.UInt, InstructionImplementations.Xor);
                    }
                    break;

                default:
                    throw new InvalidOperationException(instruction.OpcodeType + " is not yet supported.");
                }

                // Algorithm from "Dynamic Warp Formation: Exploiting Thread Scheduling for Efficient MIMD Control Flow
                // on SIMD Graphics Hardware" by Wilson Wai Lun Fung -
                // https://circle.ubc.ca/bitstream/handle/2429/2268/ubc_2008_fall_fung_wilson_wai_lun.pdf?sequence=1
                //
                // 3 possible cases:
                // - No Divergence (single next PC)
                //     => Update the next PC field of the top of stack (TOS) entry to
                //        the next PC of all active threads in this warp.
                // - Divergence (multiple next PC)
                //     => Modify the next PC field of the TOS entry to the reconvergence point.
                //        For each unique next PC of the warp, push a
                //        new entry onto the stack with next PC field being the unique
                //        next PC and the reconv. PC being the reconvergence point.
                //        The active mask of each entry denotes the threads branching
                //        to the next PC value of this entry.
                // - Reconvergence (next PC = reconv. PC of TOS)
                //     => Pop TOS entry from the stack.
                if (instruction.UpdateDivergenceStack(warp.DivergenceStack, activeMasks))
                {
                    activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, topOfDivergenceStack);
                    topOfDivergenceStack    = warp.DivergenceStack.Peek();
                }
            }
        }
예제 #2
0
		/// <summary>
		/// http://http.developer.nvidia.com/GPUGems2/gpugems2_chapter34.html
		/// http://people.maths.ox.ac.uk/gilesm/pp10/lec2_2x2.pdf
		/// http://stackoverflow.com/questions/10119796/how-does-cuda-compiler-know-the-divergence-behaviour-of-warps
		/// http://www.istc-cc.cmu.edu/publications/papers/2011/SIMD.pdf
		/// http://hal.archives-ouvertes.fr/docs/00/62/26/54/PDF/collange_sympa2011_en.pdf
        /// http://users.ece.cmu.edu/~omutlu/pub/large-gpu-warps_micro11.pdf
        /// http://www.eecis.udel.edu/~cavazos/cisc879/papers/a3-han.pdf
		/// </summary>
        public IEnumerable<ExecutionResponse> Execute(
            VirtualMachine virtualMachine, 
            ExecutionContext[] executionContexts, 
            ExecutableInstruction[] instructions)
		{
		    var warp = new Warp(executionContexts.Length);
		    var activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, warp.DivergenceStack.Peek());
            var topOfDivergenceStack = warp.DivergenceStack.Peek();

			while (topOfDivergenceStack.NextPC < instructions.Length)
			{
				var instruction = instructions[topOfDivergenceStack.NextPC];

			    List<BitArray> activeMasks = null;

				switch (instruction.OpcodeType)
				{
				    case ExecutableOpcodeType.Add:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Add);
				        break;
                    case ExecutableOpcodeType.And:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.UInt, InstructionImplementations.And);
                        break;
				    case ExecutableOpcodeType.Branch:
				        break;
				    case ExecutableOpcodeType.BranchC:
                        activeMasks = new List<BitArray>
                        {
                            new BitArray(executionContexts.Length),
                            new BitArray(executionContexts.Length)
                        };
				        foreach (var thread in activeExecutionContexts)
				        {
				            var src0 = GetOperandValue(thread, instruction.Operands[0], NumberType.UInt);
				            bool result = TestCondition(ref src0, instruction.TestBoolean);
				            activeMasks[0][thread.Index] = !result;
				            activeMasks[1][thread.Index] = result;
				        }
				        break;
				    case ExecutableOpcodeType.Cut:
				    case ExecutableOpcodeType.CutStream:
				        yield return ExecutionResponse.Cut;
				        break;
                    case ExecutableOpcodeType.Discard:
				        throw new NotImplementedException();
				    case ExecutableOpcodeType.Div:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Div);
				        break;
				    case ExecutableOpcodeType.Dp2:
				        foreach (var thread in activeExecutionContexts)
                            ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp2);
				        break;
				    case ExecutableOpcodeType.Dp3:
				        foreach (var thread in activeExecutionContexts)
                            ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp3);
				        break;
				    case ExecutableOpcodeType.Dp4:
				        foreach (var thread in activeExecutionContexts)
                            ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp4);
				        break;
                    case ExecutableOpcodeType.Emit:
                    case ExecutableOpcodeType.EmitStream:
                        yield return ExecutionResponse.Emit;
                        break;
                    case ExecutableOpcodeType.Eq:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Eq);
                        break;
                    case ExecutableOpcodeType.Exp:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Exp);
                        break;
                    case ExecutableOpcodeType.Frc:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Frc);
                        break;
                    case ExecutableOpcodeType.FtoI:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoI);
                        break;
                    case ExecutableOpcodeType.FtoU:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoU);
                        break;
                    case ExecutableOpcodeType.Ge:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ge);
                        break;
                    case ExecutableOpcodeType.IAdd:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IAdd);
                        break;
                    case ExecutableOpcodeType.IEq:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IEq);
                        break;
                    case ExecutableOpcodeType.IGe:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IGe);
                        break;
				    case ExecutableOpcodeType.ILt:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.ILt);
				        break;
                    case ExecutableOpcodeType.IMad:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMad);
                        break;
                    case ExecutableOpcodeType.IMin:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMin);
                        break;
                    case ExecutableOpcodeType.INe:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.INe);
                        break;
                    case ExecutableOpcodeType.INeg:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.INeg);
                        break;
                    case ExecutableOpcodeType.IShl:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShl);
                        break;
                    case ExecutableOpcodeType.IShr:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShr);
                        break;
                    case ExecutableOpcodeType.ItoF:
				        foreach (var thread in activeExecutionContexts)
				            Execute(thread, instruction, NumberType.Int, InstructionImplementations.ItoF);
				        break;
                    case ExecutableOpcodeType.Log:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Log);
                        break;
				    case ExecutableOpcodeType.Lt:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Lt);
				        break;
				    case ExecutableOpcodeType.Mad:
				        foreach (var thread in activeExecutionContexts)
				            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mad);
				        break;
				    case ExecutableOpcodeType.Max:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Max);
				        break;
                    case ExecutableOpcodeType.Min:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Min);
                        break;
				    case ExecutableOpcodeType.Mov:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mov);
				        break;
				    case ExecutableOpcodeType.MovC:
				        foreach (var thread in activeExecutionContexts)
				            Execute(thread, instruction, NumberType.Float, InstructionImplementations.MovC);
				        break;
				    case ExecutableOpcodeType.Mul:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mul);
				        break;
                    case ExecutableOpcodeType.Ne:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ne);
                        break;
				    case ExecutableOpcodeType.Ret:
				        yield return ExecutionResponse.Finished;
				        break;
                    case ExecutableOpcodeType.RoundNe:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNe);
                        break;
                    case ExecutableOpcodeType.RoundNi:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNi);
                        break;
                    case ExecutableOpcodeType.RoundPi:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundPi);
                        break;
                    case ExecutableOpcodeType.RoundZ:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundZ);
                        break;
				    case ExecutableOpcodeType.Rsq:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Rsq);
				        break;
                    case ExecutableOpcodeType.DerivRtx:
				    case ExecutableOpcodeType.RtxCoarse:
				        for (var i = 0; i < executionContexts.Length; i += 4)
				        {
				            var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                            var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);

				            var deltaX = Number4.Subtract(ref topRight, ref topLeft);

				            for (var j = i; j < i + 4; j++)
				                SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaX);
				        }
				        break;
				    case ExecutableOpcodeType.RtxFine:
                        for (var i = 0; i < executionContexts.Length; i += 4)
                        {
                            var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                            var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);
                            var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);
                            var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float);

                            var topDeltaX = Number4.Subtract(ref topRight, ref topLeft);
                            var bottomDeltaX = Number4.Subtract(ref bottomRight, ref bottomLeft);

                            SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], topDeltaX);
                            SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], topDeltaX);

                            SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], bottomDeltaX);
                            SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], bottomDeltaX);
                        }
				        break;
                    case ExecutableOpcodeType.DerivRty:
                    case ExecutableOpcodeType.RtyCoarse:
                        for (var i = 0; i < executionContexts.Length; i += 4)
                        {
                            var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                            var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);

                            var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft);

                            for (var j = i; j < i + 4; j++)
                                SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaY);
                        }
                        break;
                    case ExecutableOpcodeType.RtyFine:
                        for (var i = 0; i < executionContexts.Length; i += 4)
                        {
                            var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                            var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);
                            var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);
                            var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float);

                            var leftDeltaY = Number4.Subtract(ref bottomLeft, ref topLeft);
                            var rightDeltaY = Number4.Subtract(ref bottomRight, ref topRight);

                            SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], leftDeltaY);
                            SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], rightDeltaY);

                            SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], leftDeltaY);
                            SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], rightDeltaY);
                        }
                        break;
                    case ExecutableOpcodeType.Sample:
				    {
				        var srcResourceIndex = instruction.Operands[2].Indices[0].Value;
                        var srcResource = virtualMachine.Textures[srcResourceIndex];
				        var srcSampler = virtualMachine.Samplers[instruction.Operands[3].Indices[0].Value];
				        var textureSampler = virtualMachine.TextureSamplers[srcResourceIndex];

                        if (textureSampler == null || srcResource == null)
                        {
                            var zero = new Number4();
                            foreach (var context in executionContexts)
                                SetRegisterValue(context, instruction.Operands[0], zero);
                        }
                        else
                        {
                            for (var i = 0; i < executionContexts.Length; i += 4)
                            {
                                var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                                var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);
                                var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);
                                var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float);

                                var deltaX = Number4.Subtract(ref topRight, ref topLeft);
                                var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft);

                                SetRegisterValue(executionContexts[i + 0], instruction.Operands[0],
                                    textureSampler.SampleGrad(srcResource, srcSampler, ref topLeft,
                                        ref deltaX, ref deltaY));
                                SetRegisterValue(executionContexts[i + 1], instruction.Operands[0],
                                    textureSampler.SampleGrad(srcResource, srcSampler, ref topRight,
                                        ref deltaX, ref deltaY));
                                SetRegisterValue(executionContexts[i + 2], instruction.Operands[0],
                                    textureSampler.SampleGrad(srcResource, srcSampler, ref bottomLeft,
                                        ref deltaX, ref deltaY));
                                SetRegisterValue(executionContexts[i + 3], instruction.Operands[0],
                                    textureSampler.SampleGrad(srcResource, srcSampler, ref bottomRight,
                                        ref deltaX, ref deltaY));
                            }
                        }
				        break;
				    }
				    case ExecutableOpcodeType.Sqrt:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Sqrt);
				        break;
                    case ExecutableOpcodeType.UtoF:
				        foreach (var thread in activeExecutionContexts)
				            Execute(thread, instruction, NumberType.UInt, InstructionImplementations.UtoF);
				        break;
				    case ExecutableOpcodeType.Xor:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.UInt, InstructionImplementations.Xor);
				        break;
				    default:
				        throw new InvalidOperationException(instruction.OpcodeType + " is not yet supported.");
				}

			    // Algorithm from "Dynamic Warp Formation: Exploiting Thread Scheduling for Efficient MIMD Control Flow
				// on SIMD Graphics Hardware" by Wilson Wai Lun Fung -
				// https://circle.ubc.ca/bitstream/handle/2429/2268/ubc_2008_fall_fung_wilson_wai_lun.pdf?sequence=1
				// 
				// 3 possible cases:
				// - No Divergence (single next PC)
				//     => Update the next PC field of the top of stack (TOS) entry to
				//        the next PC of all active threads in this warp.
				// - Divergence (multiple next PC)
				//     => Modify the next PC field of the TOS entry to the reconvergence point. 
				//        For each unique next PC of the warp, push a
				//        new entry onto the stack with next PC field being the unique
				//        next PC and the reconv. PC being the reconvergence point.
				//        The active mask of each entry denotes the threads branching
				//        to the next PC value of this entry.
				// - Reconvergence (next PC = reconv. PC of TOS)
				//     => Pop TOS entry from the stack.
			    if (instruction.UpdateDivergenceStack(warp.DivergenceStack, activeMasks))
			    {
			        activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, topOfDivergenceStack);
			        topOfDivergenceStack = warp.DivergenceStack.Peek();
			    }
			}
		}