Ejemplo n.º 1
0
        private static string GenerateNonDivergent(ExecutableInstruction[] instructions)
        {
            var instructionsCode = new StringBuilder();
            int instructionIndex = 0;
            foreach (var instruction in instructions)
            {
                instructionsCode.AppendLineIndent(2, "// " + instruction);
                GenerateInstructionCode(instructionsCode, instruction);
                instructionsCode.AppendLine();

                ++instructionIndex;
            }

            return @"
using System.Collections;
using System.Collections.Generic;
using SlimShader;
using SlimShader.VirtualMachine;
using SlimShader.VirtualMachine.Analysis.ExecutableInstructions;
using SlimShader.VirtualMachine.Execution;

public static class DynamicShaderExecutor
{
    public static IEnumerable<ExecutionResponse> Execute(
        VirtualMachine virtualMachine, ExecutionContext[] executionContexts,
        ExecutableInstruction[] instructions)
    {
        var activeExecutionContexts = executionContexts;

" + instructionsCode + @"
    }
}";
        }
Ejemplo n.º 2
0
        private static string GenerateDivergent(ExecutableInstruction[] instructions)
        {
            var instructionsCode = new StringBuilder();
            int instructionIndex = 0;
            foreach (var instruction in instructions)
            {
                instructionsCode.AppendLineIndent(4, "case {0}:", instructionIndex);
                instructionsCode.AppendLineIndent(4, "{");
                instructionsCode.AppendLineIndent(5, "// " + instruction);
                GenerateInstructionCode(instructionsCode, instruction);
                instructionsCode.AppendLine();

                if (instruction is DivergentExecutableInstruction)
                    instructionsCode.AppendLineIndent(5, "if (instruction.UpdateDivergenceStack(warp.DivergenceStack, activeMasks))");
                else
                    instructionsCode.AppendLineIndent(5, "if (instruction.UpdateDivergenceStack(warp.DivergenceStack, null))");
                instructionsCode.AppendLineIndent(5, "{");
                instructionsCode.AppendLineIndent(5, "    activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, topOfDivergenceStack);");
                instructionsCode.AppendLineIndent(5, "    topOfDivergenceStack = warp.DivergenceStack.Peek();");
                instructionsCode.AppendLineIndent(5, "}");

                instructionsCode.AppendLineIndent(5, "break;");
                instructionsCode.AppendLineIndent(4, "}");
                instructionsCode.AppendLine();

                ++instructionIndex;
            }

            return @"
using System.Collections;
using System.Collections.Generic;
using SlimShader;
using SlimShader.VirtualMachine;
using SlimShader.VirtualMachine.Analysis.ExecutableInstructions;
using SlimShader.VirtualMachine.Execution;

public static class DynamicShaderExecutor
{
    public static IEnumerable<ExecutionResponse> Execute(
        VirtualMachine virtualMachine, ExecutionContext[] executionContexts,
        ExecutableInstruction[] instructions)
    {
        var warp = new Warp(executionContexts.Length);
        var activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, warp.DivergenceStack.Peek());
        var topOfDivergenceStack = warp.DivergenceStack.Peek();

        while (topOfDivergenceStack.NextPC < instructions.Length)
        {
            var instruction = instructions[topOfDivergenceStack.NextPC];
            switch (topOfDivergenceStack.NextPC)
            {
" + instructionsCode + @"
            }
        }

    }
}";
        }
Ejemplo n.º 3
0
        public IEnumerable<ExecutionResponse> Execute(
            VirtualMachine virtualMachine, ExecutionContext[] executionContexts,
            ExecutableInstruction[] instructions)
        {
            // Find existing JITted shader.
            ExecuteShaderDelegate jittedShader;
            if (!_jittedShaderCache.TryGetValue(virtualMachine.Bytecode, out jittedShader))
            {
                // If shader hasn't already been JITted, JIT it now.
                jittedShader = JitCompileShader(instructions);
                _jittedShaderCache.Add(virtualMachine.Bytecode, jittedShader);
            }

            // Execute shader.
            return jittedShader(virtualMachine, executionContexts, instructions);
        }
Ejemplo n.º 4
0
        private static ExecuteShaderDelegate JitCompileShader(ExecutableInstruction[] instructions)
        {
            var assemblyReferences = new[]
            {
                MetadataReference.CreateAssemblyReference("mscorlib"),
                new MetadataFileReference(typeof(Number4).Assembly.Location),
                new MetadataFileReference(typeof(VirtualMachine).Assembly.Location)
            };

            const string outputName = "SlimShader.VirtualMachine.Jitter.Generated";
            var code = ShaderCodeGenerator.Generate(instructions);
            
            var compilation = Compilation.Create(outputName)
                .WithOptions(new CompilationOptions(OutputKind.DynamicallyLinkedLibrary, debugInformationKind: Roslyn.Compilers.Common.DebugInformationKind.Full))
                .AddReferences(assemblyReferences)
                .AddSyntaxTrees(SyntaxTree.ParseText(code));

            var moduleBuilder = AppDomain.CurrentDomain
                .DefineDynamicAssembly(new AssemblyName(outputName),
                    AssemblyBuilderAccess.RunAndCollect)
                .DefineDynamicModule(outputName);

            System.Diagnostics.Debug.Write(code);

            var compilationResult = compilation.Emit(moduleBuilder);
            if (!compilationResult.Success)
            {
                var errorMessage = string.Empty;
                foreach (var diagnostic in compilationResult.Diagnostics)
                    errorMessage += diagnostic + Environment.NewLine;
                throw new Exception(errorMessage);
            }

            var dynamicClass = moduleBuilder.GetType("DynamicShaderExecutor", false, true);
            var dynamicMethod = dynamicClass.GetMethod("Execute");

            return (ExecuteShaderDelegate) dynamicMethod.CreateDelegate(typeof(ExecuteShaderDelegate));
        }
Ejemplo n.º 5
0
 private static void GenerateExecute3(StringBuilder sb, ExecutableInstruction instruction, string methodName)
 {
     sb.AppendLineIndent(2, "foreach (var context in activeExecutionContexts)");
     sb.AppendLineIndent(2, "{");
     sb.AppendLineIndent(2, "    var src0 = {0};", GenerateGetOperandValue(instruction.Operands[1], NumberType.Float));
     sb.AppendLineIndent(2, "    var src1 = {0};", GenerateGetOperandValue(instruction.Operands[2], NumberType.Float));
     sb.AppendLineIndent(2, "    var src2 = {0};", GenerateGetOperandValue(instruction.Operands[3], NumberType.Float));
     sb.AppendLineIndent(2, "    var result = InstructionImplementations.{0}({1}, ref src0, ref src1, ref src2);",
         methodName, instruction.Saturate.ToString().ToLower());
     GenerateSetRegisterValue(sb, instruction.Operands[0]);
     sb.AppendLineIndent(2, "}");
 }
Ejemplo n.º 6
0
 public static string Generate(ExecutableInstruction[] instructions)
 {
     if (instructions.Any(x => x is DivergentExecutableInstruction))
         return GenerateDivergent(instructions);
     return GenerateNonDivergent(instructions);
 }
Ejemplo n.º 7
0
        private static void GenerateInstructionCode(StringBuilder sb, ExecutableInstruction instruction)
        {
            switch (instruction.OpcodeType)
            {
                case Execution.ExecutableOpcodeType.Add:
                    GenerateExecute2(sb, instruction, "Add");
                    break;
                case Execution.ExecutableOpcodeType.Branch :
                    break;
                case Execution.ExecutableOpcodeType.BranchC :
                    sb.AppendLineIndent(2, "var activeMasks = new List<BitArray>");
                    sb.AppendLineIndent(2, "{");
                    sb.AppendLineIndent(2, "    new BitArray(executionContexts.Length),");
                    sb.AppendLineIndent(2, "    new BitArray(executionContexts.Length)");
                    sb.AppendLineIndent(2, "};");
                    sb.AppendLineIndent(2, "foreach (var context in activeExecutionContexts)");
                    sb.AppendLineIndent(2, "{");
                    sb.AppendLineIndent(2, "    var src0 = {0};", GenerateGetOperandValue(instruction.Operands[0], NumberType.UInt));
                    sb.AppendLineIndent(2, "    var result = src0.{0};", GenerateTestCondition(instruction.TestBoolean));
                    sb.AppendLineIndent(2, "    activeMasks[0][context.Index] = !result;");
                    sb.AppendLineIndent(2, "    activeMasks[1][context.Index] = result;");
                    sb.AppendLineIndent(2, "}");
                    break;
				case Execution.ExecutableOpcodeType.Cut:
				case Execution.ExecutableOpcodeType.CutStream:
					sb.AppendLineIndent(2, "yield return ExecutionResponse.Cut;");
					break;
                case Execution.ExecutableOpcodeType.Dp2:
                    GenerateExecuteScalar2(sb, instruction, "Dp2");
                    break;
                case Execution.ExecutableOpcodeType.Dp3:
                    GenerateExecuteScalar2(sb, instruction, "Dp3");
                    break;
                case Execution.ExecutableOpcodeType.Dp4:
                    GenerateExecuteScalar2(sb, instruction, "Dp4");
                    break;
				case Execution.ExecutableOpcodeType.Emit:
				case Execution.ExecutableOpcodeType.EmitStream:
					sb.AppendLineIndent(2, "yield return ExecutionResponse.Emit;");
					break;
                case Execution.ExecutableOpcodeType.IAdd:
                    GenerateExecute2NoSat(sb, instruction, "IAdd");
                    break;
                case Execution.ExecutableOpcodeType.IGe:
                    GenerateExecute2NoSat(sb, instruction, "IGe");
                    break;
				case Execution.ExecutableOpcodeType.IShl:
					GenerateExecute2NoSat(sb, instruction, "IShl");
					break;
				case Execution.ExecutableOpcodeType.IShr:
					GenerateExecute2NoSat(sb, instruction, "IShr");
					break;
                case Execution.ExecutableOpcodeType.Mad:
                    GenerateExecute3(sb, instruction, "Mad");
                    break;
                case Execution.ExecutableOpcodeType.Max:
                    GenerateExecute2(sb, instruction, "Max");
                    break;
                case Execution.ExecutableOpcodeType.Min:
                    GenerateExecute2(sb, instruction, "Min");
                    break;
                case Execution.ExecutableOpcodeType.Mul:
                    GenerateExecute2(sb, instruction, "Mul");
                    break;
                case Execution.ExecutableOpcodeType.Mov :
                    GenerateExecute1(sb, instruction, "Mov");
                    break;
                case Execution.ExecutableOpcodeType.MovC:
                    GenerateExecute3(sb, instruction, "MovC");
                    break;
                case Execution.ExecutableOpcodeType.Ret :
                    sb.AppendLineIndent(2, "yield return ExecutionResponse.Finished;");
                    break;
                case Execution.ExecutableOpcodeType.Rsq:
                    GenerateExecute1(sb, instruction, "Rsq");
                    break;
                case Execution.ExecutableOpcodeType.Sample :
                    var srcResourceIndex = instruction.Operands[2].Indices[0].Value;
                    var srcSamplerIndex = instruction.Operands[3].Indices[0].Value;

                    sb.AppendLineIndent(2, "{");
                    sb.AppendLineIndent(2, "    var textureSampler = virtualMachine.TextureSamplers[{0}];", srcResourceIndex);
                    sb.AppendLineIndent(2, "    var srcResource = virtualMachine.Textures[{0}];", srcResourceIndex);
                    sb.AppendLineIndent(2, "    var srcSampler = virtualMachine.Samplers[{0}];", srcSamplerIndex);
                    sb.AppendLineIndent(2, "    ");
                    sb.AppendLineIndent(2, "    if (textureSampler == null || srcResource == null)");
                    sb.AppendLineIndent(2, "    {");
                    sb.AppendLineIndent(2, "        var result = new Number4();");
                    sb.AppendLineIndent(2, "        foreach (var context in executionContexts)");
                    GenerateSetRegisterValue(sb, instruction.Operands[0]);
                    sb.AppendLineIndent(2, "    }");
                    sb.AppendLineIndent(2, "    else");
                    sb.AppendLineIndent(2, "    {");
                    sb.AppendLineIndent(2, "        for (var i = 0; i < executionContexts.Length; i += 4)");
                    sb.AppendLineIndent(2, "        {");
                    sb.AppendLineIndent(2, "            var topLeft = {0};", GenerateGetOperandValue(instruction.Operands[1], NumberType.Float, "executionContexts[i + 0]"));
                    sb.AppendLineIndent(2, "            var topRight = {0};", GenerateGetOperandValue(instruction.Operands[1], NumberType.Float, "executionContexts[i + 1]"));
                    sb.AppendLineIndent(2, "            var bottomLeft = {0};", GenerateGetOperandValue(instruction.Operands[1], NumberType.Float, "executionContexts[i + 2]"));
                    sb.AppendLineIndent(2, "            var bottomRight = {0};", GenerateGetOperandValue(instruction.Operands[1], NumberType.Float, "executionContexts[i + 3]"));
                    sb.AppendLineIndent(2, "            ");
                    sb.AppendLineIndent(2, "            var deltaX = Number4.Subtract(ref topRight, ref topLeft);");
                    sb.AppendLineIndent(2, "            var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft);");
                    sb.AppendLineIndent(2, "            ");
                    sb.AppendLineIndent(2, "            var result = textureSampler.SampleGrad(srcResource, srcSampler, ref topLeft, ref deltaX, ref deltaY);");
                    GenerateSetRegisterValue(sb, instruction.Operands[0], "executionContexts[i + 0]");
                    sb.AppendLineIndent(2, "            ");
                    sb.AppendLineIndent(2, "            result = textureSampler.SampleGrad(srcResource, srcSampler, ref topRight, ref deltaX, ref deltaY);");
                    GenerateSetRegisterValue(sb, instruction.Operands[0], "executionContexts[i + 1]");
                    sb.AppendLineIndent(2, "        ");
                    sb.AppendLineIndent(2, "            result = textureSampler.SampleGrad(srcResource, srcSampler, ref bottomLeft, ref deltaX, ref deltaY);");
                    GenerateSetRegisterValue(sb, instruction.Operands[0], "executionContexts[i + 2]");
                    sb.AppendLineIndent(2, "        ");
                    sb.AppendLineIndent(2, "            result = textureSampler.SampleGrad(srcResource, srcSampler, ref bottomRight, ref deltaX, ref deltaY);");
                    GenerateSetRegisterValue(sb, instruction.Operands[0], "executionContexts[i + 3]");
                    sb.AppendLineIndent(2, "        ");
                    sb.AppendLineIndent(2, "        }");
                    sb.AppendLineIndent(2, "    }");
                    sb.AppendLineIndent(2, "}");
                    break;
                default :
                    throw new InvalidOperationException(instruction.OpcodeType + " is not yet supported.");
            }
        }
Ejemplo n.º 8
0
        private static void Execute(ExecutionContext context, ExecutableInstruction instruction, NumberType numberType, Number4ToNumber4Callback callback)
        {
            var src0 = GetOperandValue(context, instruction.Operands[1], numberType);
            var result = callback(ref src0);

            SetRegisterValue(context, instruction.Operands[0], result);
        }
Ejemplo n.º 9
0
        private static void Execute(ExecutionContext context, ExecutableInstruction instruction, NumberType numberType, BoolNumber4Number4Number4ToNumber4Callback callback)
        {
            var src0 = GetOperandValue(context, instruction.Operands[1], numberType);
            var src1 = GetOperandValue(context, instruction.Operands[2], numberType);
            var src2 = GetOperandValue(context, instruction.Operands[3], numberType);
            var result = callback(instruction.Saturate, ref src0, ref src1, ref src2);

            SetRegisterValue(context, instruction.Operands[0], result);
        }
Ejemplo n.º 10
0
        private static void ExecuteScalar(ExecutionContext context, ExecutableInstruction instruction, NumberType numberType, Number4Number4ToNumberCallback callback)
        {
            var src0 = GetOperandValue(context, instruction.Operands[1], numberType);
            var src1 = GetOperandValue(context, instruction.Operands[2], numberType);
            var result = callback(instruction.Saturate, ref src0, ref src1);

            SetRegisterValue(context, instruction.Operands[0], new Number4
            {
                Number0 = result,
                Number1 = result,
                Number2 = result,
                Number3 = result
            });
        }
Ejemplo n.º 11
0
		/// <summary>
		/// http://http.developer.nvidia.com/GPUGems2/gpugems2_chapter34.html
		/// http://people.maths.ox.ac.uk/gilesm/pp10/lec2_2x2.pdf
		/// http://stackoverflow.com/questions/10119796/how-does-cuda-compiler-know-the-divergence-behaviour-of-warps
		/// http://www.istc-cc.cmu.edu/publications/papers/2011/SIMD.pdf
		/// http://hal.archives-ouvertes.fr/docs/00/62/26/54/PDF/collange_sympa2011_en.pdf
        /// http://users.ece.cmu.edu/~omutlu/pub/large-gpu-warps_micro11.pdf
        /// http://www.eecis.udel.edu/~cavazos/cisc879/papers/a3-han.pdf
		/// </summary>
        public IEnumerable<ExecutionResponse> Execute(
            VirtualMachine virtualMachine, 
            ExecutionContext[] executionContexts, 
            ExecutableInstruction[] instructions)
		{
		    var warp = new Warp(executionContexts.Length);
		    var activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, warp.DivergenceStack.Peek());
            var topOfDivergenceStack = warp.DivergenceStack.Peek();

			while (topOfDivergenceStack.NextPC < instructions.Length)
			{
				var instruction = instructions[topOfDivergenceStack.NextPC];

			    List<BitArray> activeMasks = null;

				switch (instruction.OpcodeType)
				{
				    case ExecutableOpcodeType.Add:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Add);
				        break;
                    case ExecutableOpcodeType.And:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.UInt, InstructionImplementations.And);
                        break;
				    case ExecutableOpcodeType.Branch:
				        break;
				    case ExecutableOpcodeType.BranchC:
                        activeMasks = new List<BitArray>
                        {
                            new BitArray(executionContexts.Length),
                            new BitArray(executionContexts.Length)
                        };
				        foreach (var thread in activeExecutionContexts)
				        {
				            var src0 = GetOperandValue(thread, instruction.Operands[0], NumberType.UInt);
				            bool result = TestCondition(ref src0, instruction.TestBoolean);
				            activeMasks[0][thread.Index] = !result;
				            activeMasks[1][thread.Index] = result;
				        }
				        break;
				    case ExecutableOpcodeType.Cut:
				    case ExecutableOpcodeType.CutStream:
				        yield return ExecutionResponse.Cut;
				        break;
                    case ExecutableOpcodeType.Discard:
				        throw new NotImplementedException();
				    case ExecutableOpcodeType.Div:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Div);
				        break;
				    case ExecutableOpcodeType.Dp2:
				        foreach (var thread in activeExecutionContexts)
                            ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp2);
				        break;
				    case ExecutableOpcodeType.Dp3:
				        foreach (var thread in activeExecutionContexts)
                            ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp3);
				        break;
				    case ExecutableOpcodeType.Dp4:
				        foreach (var thread in activeExecutionContexts)
                            ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp4);
				        break;
                    case ExecutableOpcodeType.Emit:
                    case ExecutableOpcodeType.EmitStream:
                        yield return ExecutionResponse.Emit;
                        break;
                    case ExecutableOpcodeType.Eq:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Eq);
                        break;
                    case ExecutableOpcodeType.Exp:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Exp);
                        break;
                    case ExecutableOpcodeType.Frc:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Frc);
                        break;
                    case ExecutableOpcodeType.FtoI:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoI);
                        break;
                    case ExecutableOpcodeType.FtoU:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoU);
                        break;
                    case ExecutableOpcodeType.Ge:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ge);
                        break;
                    case ExecutableOpcodeType.IAdd:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IAdd);
                        break;
                    case ExecutableOpcodeType.IEq:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IEq);
                        break;
                    case ExecutableOpcodeType.IGe:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IGe);
                        break;
				    case ExecutableOpcodeType.ILt:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.ILt);
				        break;
                    case ExecutableOpcodeType.IMad:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMad);
                        break;
                    case ExecutableOpcodeType.IMin:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMin);
                        break;
                    case ExecutableOpcodeType.INe:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.INe);
                        break;
                    case ExecutableOpcodeType.INeg:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.INeg);
                        break;
                    case ExecutableOpcodeType.IShl:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShl);
                        break;
                    case ExecutableOpcodeType.IShr:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShr);
                        break;
                    case ExecutableOpcodeType.ItoF:
				        foreach (var thread in activeExecutionContexts)
				            Execute(thread, instruction, NumberType.Int, InstructionImplementations.ItoF);
				        break;
                    case ExecutableOpcodeType.Log:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Log);
                        break;
				    case ExecutableOpcodeType.Lt:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Lt);
				        break;
				    case ExecutableOpcodeType.Mad:
				        foreach (var thread in activeExecutionContexts)
				            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mad);
				        break;
				    case ExecutableOpcodeType.Max:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Max);
				        break;
                    case ExecutableOpcodeType.Min:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Min);
                        break;
				    case ExecutableOpcodeType.Mov:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mov);
				        break;
				    case ExecutableOpcodeType.MovC:
				        foreach (var thread in activeExecutionContexts)
				            Execute(thread, instruction, NumberType.Float, InstructionImplementations.MovC);
				        break;
				    case ExecutableOpcodeType.Mul:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mul);
				        break;
                    case ExecutableOpcodeType.Ne:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ne);
                        break;
				    case ExecutableOpcodeType.Ret:
				        yield return ExecutionResponse.Finished;
				        break;
                    case ExecutableOpcodeType.RoundNe:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNe);
                        break;
                    case ExecutableOpcodeType.RoundNi:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNi);
                        break;
                    case ExecutableOpcodeType.RoundPi:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundPi);
                        break;
                    case ExecutableOpcodeType.RoundZ:
                        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundZ);
                        break;
				    case ExecutableOpcodeType.Rsq:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Rsq);
				        break;
                    case ExecutableOpcodeType.DerivRtx:
				    case ExecutableOpcodeType.RtxCoarse:
				        for (var i = 0; i < executionContexts.Length; i += 4)
				        {
				            var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                            var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);

				            var deltaX = Number4.Subtract(ref topRight, ref topLeft);

				            for (var j = i; j < i + 4; j++)
				                SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaX);
				        }
				        break;
				    case ExecutableOpcodeType.RtxFine:
                        for (var i = 0; i < executionContexts.Length; i += 4)
                        {
                            var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                            var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);
                            var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);
                            var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float);

                            var topDeltaX = Number4.Subtract(ref topRight, ref topLeft);
                            var bottomDeltaX = Number4.Subtract(ref bottomRight, ref bottomLeft);

                            SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], topDeltaX);
                            SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], topDeltaX);

                            SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], bottomDeltaX);
                            SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], bottomDeltaX);
                        }
				        break;
                    case ExecutableOpcodeType.DerivRty:
                    case ExecutableOpcodeType.RtyCoarse:
                        for (var i = 0; i < executionContexts.Length; i += 4)
                        {
                            var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                            var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);

                            var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft);

                            for (var j = i; j < i + 4; j++)
                                SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaY);
                        }
                        break;
                    case ExecutableOpcodeType.RtyFine:
                        for (var i = 0; i < executionContexts.Length; i += 4)
                        {
                            var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                            var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);
                            var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);
                            var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float);

                            var leftDeltaY = Number4.Subtract(ref bottomLeft, ref topLeft);
                            var rightDeltaY = Number4.Subtract(ref bottomRight, ref topRight);

                            SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], leftDeltaY);
                            SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], rightDeltaY);

                            SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], leftDeltaY);
                            SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], rightDeltaY);
                        }
                        break;
                    case ExecutableOpcodeType.Sample:
				    {
				        var srcResourceIndex = instruction.Operands[2].Indices[0].Value;
                        var srcResource = virtualMachine.Textures[srcResourceIndex];
				        var srcSampler = virtualMachine.Samplers[instruction.Operands[3].Indices[0].Value];
				        var textureSampler = virtualMachine.TextureSamplers[srcResourceIndex];

                        if (textureSampler == null || srcResource == null)
                        {
                            var zero = new Number4();
                            foreach (var context in executionContexts)
                                SetRegisterValue(context, instruction.Operands[0], zero);
                        }
                        else
                        {
                            for (var i = 0; i < executionContexts.Length; i += 4)
                            {
                                var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float);
                                var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float);
                                var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float);
                                var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float);

                                var deltaX = Number4.Subtract(ref topRight, ref topLeft);
                                var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft);

                                SetRegisterValue(executionContexts[i + 0], instruction.Operands[0],
                                    textureSampler.SampleGrad(srcResource, srcSampler, ref topLeft,
                                        ref deltaX, ref deltaY));
                                SetRegisterValue(executionContexts[i + 1], instruction.Operands[0],
                                    textureSampler.SampleGrad(srcResource, srcSampler, ref topRight,
                                        ref deltaX, ref deltaY));
                                SetRegisterValue(executionContexts[i + 2], instruction.Operands[0],
                                    textureSampler.SampleGrad(srcResource, srcSampler, ref bottomLeft,
                                        ref deltaX, ref deltaY));
                                SetRegisterValue(executionContexts[i + 3], instruction.Operands[0],
                                    textureSampler.SampleGrad(srcResource, srcSampler, ref bottomRight,
                                        ref deltaX, ref deltaY));
                            }
                        }
				        break;
				    }
				    case ExecutableOpcodeType.Sqrt:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.Float, InstructionImplementations.Sqrt);
				        break;
                    case ExecutableOpcodeType.UtoF:
				        foreach (var thread in activeExecutionContexts)
				            Execute(thread, instruction, NumberType.UInt, InstructionImplementations.UtoF);
				        break;
				    case ExecutableOpcodeType.Xor:
				        foreach (var thread in activeExecutionContexts)
                            Execute(thread, instruction, NumberType.UInt, InstructionImplementations.Xor);
				        break;
				    default:
				        throw new InvalidOperationException(instruction.OpcodeType + " is not yet supported.");
				}

			    // Algorithm from "Dynamic Warp Formation: Exploiting Thread Scheduling for Efficient MIMD Control Flow
				// on SIMD Graphics Hardware" by Wilson Wai Lun Fung -
				// https://circle.ubc.ca/bitstream/handle/2429/2268/ubc_2008_fall_fung_wilson_wai_lun.pdf?sequence=1
				// 
				// 3 possible cases:
				// - No Divergence (single next PC)
				//     => Update the next PC field of the top of stack (TOS) entry to
				//        the next PC of all active threads in this warp.
				// - Divergence (multiple next PC)
				//     => Modify the next PC field of the TOS entry to the reconvergence point. 
				//        For each unique next PC of the warp, push a
				//        new entry onto the stack with next PC field being the unique
				//        next PC and the reconv. PC being the reconvergence point.
				//        The active mask of each entry denotes the threads branching
				//        to the next PC value of this entry.
				// - Reconvergence (next PC = reconv. PC of TOS)
				//     => Pop TOS entry from the stack.
			    if (instruction.UpdateDivergenceStack(warp.DivergenceStack, activeMasks))
			    {
			        activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, topOfDivergenceStack);
			        topOfDivergenceStack = warp.DivergenceStack.Peek();
			    }
			}
		}