private static string GenerateNonDivergent(ExecutableInstruction[] instructions) { var instructionsCode = new StringBuilder(); int instructionIndex = 0; foreach (var instruction in instructions) { instructionsCode.AppendLineIndent(2, "// " + instruction); GenerateInstructionCode(instructionsCode, instruction); instructionsCode.AppendLine(); ++instructionIndex; } return @" using System.Collections; using System.Collections.Generic; using SlimShader; using SlimShader.VirtualMachine; using SlimShader.VirtualMachine.Analysis.ExecutableInstructions; using SlimShader.VirtualMachine.Execution; public static class DynamicShaderExecutor { public static IEnumerable<ExecutionResponse> Execute( VirtualMachine virtualMachine, ExecutionContext[] executionContexts, ExecutableInstruction[] instructions) { var activeExecutionContexts = executionContexts; " + instructionsCode + @" } }"; }
private static string GenerateDivergent(ExecutableInstruction[] instructions) { var instructionsCode = new StringBuilder(); int instructionIndex = 0; foreach (var instruction in instructions) { instructionsCode.AppendLineIndent(4, "case {0}:", instructionIndex); instructionsCode.AppendLineIndent(4, "{"); instructionsCode.AppendLineIndent(5, "// " + instruction); GenerateInstructionCode(instructionsCode, instruction); instructionsCode.AppendLine(); if (instruction is DivergentExecutableInstruction) instructionsCode.AppendLineIndent(5, "if (instruction.UpdateDivergenceStack(warp.DivergenceStack, activeMasks))"); else instructionsCode.AppendLineIndent(5, "if (instruction.UpdateDivergenceStack(warp.DivergenceStack, null))"); instructionsCode.AppendLineIndent(5, "{"); instructionsCode.AppendLineIndent(5, " activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, topOfDivergenceStack);"); instructionsCode.AppendLineIndent(5, " topOfDivergenceStack = warp.DivergenceStack.Peek();"); instructionsCode.AppendLineIndent(5, "}"); instructionsCode.AppendLineIndent(5, "break;"); instructionsCode.AppendLineIndent(4, "}"); instructionsCode.AppendLine(); ++instructionIndex; } return @" using System.Collections; using System.Collections.Generic; using SlimShader; using SlimShader.VirtualMachine; using SlimShader.VirtualMachine.Analysis.ExecutableInstructions; using SlimShader.VirtualMachine.Execution; public static class DynamicShaderExecutor { public static IEnumerable<ExecutionResponse> Execute( VirtualMachine virtualMachine, ExecutionContext[] executionContexts, ExecutableInstruction[] instructions) { var warp = new Warp(executionContexts.Length); var activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, warp.DivergenceStack.Peek()); var topOfDivergenceStack = warp.DivergenceStack.Peek(); while (topOfDivergenceStack.NextPC < instructions.Length) { var instruction = instructions[topOfDivergenceStack.NextPC]; switch (topOfDivergenceStack.NextPC) { " + instructionsCode + @" } } } }"; }
public IEnumerable<ExecutionResponse> Execute( VirtualMachine virtualMachine, ExecutionContext[] executionContexts, ExecutableInstruction[] instructions) { // Find existing JITted shader. ExecuteShaderDelegate jittedShader; if (!_jittedShaderCache.TryGetValue(virtualMachine.Bytecode, out jittedShader)) { // If shader hasn't already been JITted, JIT it now. jittedShader = JitCompileShader(instructions); _jittedShaderCache.Add(virtualMachine.Bytecode, jittedShader); } // Execute shader. return jittedShader(virtualMachine, executionContexts, instructions); }
private static ExecuteShaderDelegate JitCompileShader(ExecutableInstruction[] instructions) { var assemblyReferences = new[] { MetadataReference.CreateAssemblyReference("mscorlib"), new MetadataFileReference(typeof(Number4).Assembly.Location), new MetadataFileReference(typeof(VirtualMachine).Assembly.Location) }; const string outputName = "SlimShader.VirtualMachine.Jitter.Generated"; var code = ShaderCodeGenerator.Generate(instructions); var compilation = Compilation.Create(outputName) .WithOptions(new CompilationOptions(OutputKind.DynamicallyLinkedLibrary, debugInformationKind: Roslyn.Compilers.Common.DebugInformationKind.Full)) .AddReferences(assemblyReferences) .AddSyntaxTrees(SyntaxTree.ParseText(code)); var moduleBuilder = AppDomain.CurrentDomain .DefineDynamicAssembly(new AssemblyName(outputName), AssemblyBuilderAccess.RunAndCollect) .DefineDynamicModule(outputName); System.Diagnostics.Debug.Write(code); var compilationResult = compilation.Emit(moduleBuilder); if (!compilationResult.Success) { var errorMessage = string.Empty; foreach (var diagnostic in compilationResult.Diagnostics) errorMessage += diagnostic + Environment.NewLine; throw new Exception(errorMessage); } var dynamicClass = moduleBuilder.GetType("DynamicShaderExecutor", false, true); var dynamicMethod = dynamicClass.GetMethod("Execute"); return (ExecuteShaderDelegate) dynamicMethod.CreateDelegate(typeof(ExecuteShaderDelegate)); }
private static void GenerateExecute3(StringBuilder sb, ExecutableInstruction instruction, string methodName) { sb.AppendLineIndent(2, "foreach (var context in activeExecutionContexts)"); sb.AppendLineIndent(2, "{"); sb.AppendLineIndent(2, " var src0 = {0};", GenerateGetOperandValue(instruction.Operands[1], NumberType.Float)); sb.AppendLineIndent(2, " var src1 = {0};", GenerateGetOperandValue(instruction.Operands[2], NumberType.Float)); sb.AppendLineIndent(2, " var src2 = {0};", GenerateGetOperandValue(instruction.Operands[3], NumberType.Float)); sb.AppendLineIndent(2, " var result = InstructionImplementations.{0}({1}, ref src0, ref src1, ref src2);", methodName, instruction.Saturate.ToString().ToLower()); GenerateSetRegisterValue(sb, instruction.Operands[0]); sb.AppendLineIndent(2, "}"); }
public static string Generate(ExecutableInstruction[] instructions) { if (instructions.Any(x => x is DivergentExecutableInstruction)) return GenerateDivergent(instructions); return GenerateNonDivergent(instructions); }
private static void GenerateInstructionCode(StringBuilder sb, ExecutableInstruction instruction) { switch (instruction.OpcodeType) { case Execution.ExecutableOpcodeType.Add: GenerateExecute2(sb, instruction, "Add"); break; case Execution.ExecutableOpcodeType.Branch : break; case Execution.ExecutableOpcodeType.BranchC : sb.AppendLineIndent(2, "var activeMasks = new List<BitArray>"); sb.AppendLineIndent(2, "{"); sb.AppendLineIndent(2, " new BitArray(executionContexts.Length),"); sb.AppendLineIndent(2, " new BitArray(executionContexts.Length)"); sb.AppendLineIndent(2, "};"); sb.AppendLineIndent(2, "foreach (var context in activeExecutionContexts)"); sb.AppendLineIndent(2, "{"); sb.AppendLineIndent(2, " var src0 = {0};", GenerateGetOperandValue(instruction.Operands[0], NumberType.UInt)); sb.AppendLineIndent(2, " var result = src0.{0};", GenerateTestCondition(instruction.TestBoolean)); sb.AppendLineIndent(2, " activeMasks[0][context.Index] = !result;"); sb.AppendLineIndent(2, " activeMasks[1][context.Index] = result;"); sb.AppendLineIndent(2, "}"); break; case Execution.ExecutableOpcodeType.Cut: case Execution.ExecutableOpcodeType.CutStream: sb.AppendLineIndent(2, "yield return ExecutionResponse.Cut;"); break; case Execution.ExecutableOpcodeType.Dp2: GenerateExecuteScalar2(sb, instruction, "Dp2"); break; case Execution.ExecutableOpcodeType.Dp3: GenerateExecuteScalar2(sb, instruction, "Dp3"); break; case Execution.ExecutableOpcodeType.Dp4: GenerateExecuteScalar2(sb, instruction, "Dp4"); break; case Execution.ExecutableOpcodeType.Emit: case Execution.ExecutableOpcodeType.EmitStream: sb.AppendLineIndent(2, "yield return ExecutionResponse.Emit;"); break; case Execution.ExecutableOpcodeType.IAdd: GenerateExecute2NoSat(sb, instruction, "IAdd"); break; case Execution.ExecutableOpcodeType.IGe: GenerateExecute2NoSat(sb, instruction, "IGe"); break; case Execution.ExecutableOpcodeType.IShl: GenerateExecute2NoSat(sb, instruction, "IShl"); break; case Execution.ExecutableOpcodeType.IShr: GenerateExecute2NoSat(sb, instruction, "IShr"); break; case Execution.ExecutableOpcodeType.Mad: GenerateExecute3(sb, instruction, "Mad"); break; case Execution.ExecutableOpcodeType.Max: GenerateExecute2(sb, instruction, "Max"); break; case Execution.ExecutableOpcodeType.Min: GenerateExecute2(sb, instruction, "Min"); break; case Execution.ExecutableOpcodeType.Mul: GenerateExecute2(sb, instruction, "Mul"); break; case Execution.ExecutableOpcodeType.Mov : GenerateExecute1(sb, instruction, "Mov"); break; case Execution.ExecutableOpcodeType.MovC: GenerateExecute3(sb, instruction, "MovC"); break; case Execution.ExecutableOpcodeType.Ret : sb.AppendLineIndent(2, "yield return ExecutionResponse.Finished;"); break; case Execution.ExecutableOpcodeType.Rsq: GenerateExecute1(sb, instruction, "Rsq"); break; case Execution.ExecutableOpcodeType.Sample : var srcResourceIndex = instruction.Operands[2].Indices[0].Value; var srcSamplerIndex = instruction.Operands[3].Indices[0].Value; sb.AppendLineIndent(2, "{"); sb.AppendLineIndent(2, " var textureSampler = virtualMachine.TextureSamplers[{0}];", srcResourceIndex); sb.AppendLineIndent(2, " var srcResource = virtualMachine.Textures[{0}];", srcResourceIndex); sb.AppendLineIndent(2, " var srcSampler = virtualMachine.Samplers[{0}];", srcSamplerIndex); sb.AppendLineIndent(2, " "); sb.AppendLineIndent(2, " if (textureSampler == null || srcResource == null)"); sb.AppendLineIndent(2, " {"); sb.AppendLineIndent(2, " var result = new Number4();"); sb.AppendLineIndent(2, " foreach (var context in executionContexts)"); GenerateSetRegisterValue(sb, instruction.Operands[0]); sb.AppendLineIndent(2, " }"); sb.AppendLineIndent(2, " else"); sb.AppendLineIndent(2, " {"); sb.AppendLineIndent(2, " for (var i = 0; i < executionContexts.Length; i += 4)"); sb.AppendLineIndent(2, " {"); sb.AppendLineIndent(2, " var topLeft = {0};", GenerateGetOperandValue(instruction.Operands[1], NumberType.Float, "executionContexts[i + 0]")); sb.AppendLineIndent(2, " var topRight = {0};", GenerateGetOperandValue(instruction.Operands[1], NumberType.Float, "executionContexts[i + 1]")); sb.AppendLineIndent(2, " var bottomLeft = {0};", GenerateGetOperandValue(instruction.Operands[1], NumberType.Float, "executionContexts[i + 2]")); sb.AppendLineIndent(2, " var bottomRight = {0};", GenerateGetOperandValue(instruction.Operands[1], NumberType.Float, "executionContexts[i + 3]")); sb.AppendLineIndent(2, " "); sb.AppendLineIndent(2, " var deltaX = Number4.Subtract(ref topRight, ref topLeft);"); sb.AppendLineIndent(2, " var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft);"); sb.AppendLineIndent(2, " "); sb.AppendLineIndent(2, " var result = textureSampler.SampleGrad(srcResource, srcSampler, ref topLeft, ref deltaX, ref deltaY);"); GenerateSetRegisterValue(sb, instruction.Operands[0], "executionContexts[i + 0]"); sb.AppendLineIndent(2, " "); sb.AppendLineIndent(2, " result = textureSampler.SampleGrad(srcResource, srcSampler, ref topRight, ref deltaX, ref deltaY);"); GenerateSetRegisterValue(sb, instruction.Operands[0], "executionContexts[i + 1]"); sb.AppendLineIndent(2, " "); sb.AppendLineIndent(2, " result = textureSampler.SampleGrad(srcResource, srcSampler, ref bottomLeft, ref deltaX, ref deltaY);"); GenerateSetRegisterValue(sb, instruction.Operands[0], "executionContexts[i + 2]"); sb.AppendLineIndent(2, " "); sb.AppendLineIndent(2, " result = textureSampler.SampleGrad(srcResource, srcSampler, ref bottomRight, ref deltaX, ref deltaY);"); GenerateSetRegisterValue(sb, instruction.Operands[0], "executionContexts[i + 3]"); sb.AppendLineIndent(2, " "); sb.AppendLineIndent(2, " }"); sb.AppendLineIndent(2, " }"); sb.AppendLineIndent(2, "}"); break; default : throw new InvalidOperationException(instruction.OpcodeType + " is not yet supported."); } }
private static void Execute(ExecutionContext context, ExecutableInstruction instruction, NumberType numberType, Number4ToNumber4Callback callback) { var src0 = GetOperandValue(context, instruction.Operands[1], numberType); var result = callback(ref src0); SetRegisterValue(context, instruction.Operands[0], result); }
private static void Execute(ExecutionContext context, ExecutableInstruction instruction, NumberType numberType, BoolNumber4Number4Number4ToNumber4Callback callback) { var src0 = GetOperandValue(context, instruction.Operands[1], numberType); var src1 = GetOperandValue(context, instruction.Operands[2], numberType); var src2 = GetOperandValue(context, instruction.Operands[3], numberType); var result = callback(instruction.Saturate, ref src0, ref src1, ref src2); SetRegisterValue(context, instruction.Operands[0], result); }
private static void ExecuteScalar(ExecutionContext context, ExecutableInstruction instruction, NumberType numberType, Number4Number4ToNumberCallback callback) { var src0 = GetOperandValue(context, instruction.Operands[1], numberType); var src1 = GetOperandValue(context, instruction.Operands[2], numberType); var result = callback(instruction.Saturate, ref src0, ref src1); SetRegisterValue(context, instruction.Operands[0], new Number4 { Number0 = result, Number1 = result, Number2 = result, Number3 = result }); }
/// <summary> /// http://http.developer.nvidia.com/GPUGems2/gpugems2_chapter34.html /// http://people.maths.ox.ac.uk/gilesm/pp10/lec2_2x2.pdf /// http://stackoverflow.com/questions/10119796/how-does-cuda-compiler-know-the-divergence-behaviour-of-warps /// http://www.istc-cc.cmu.edu/publications/papers/2011/SIMD.pdf /// http://hal.archives-ouvertes.fr/docs/00/62/26/54/PDF/collange_sympa2011_en.pdf /// http://users.ece.cmu.edu/~omutlu/pub/large-gpu-warps_micro11.pdf /// http://www.eecis.udel.edu/~cavazos/cisc879/papers/a3-han.pdf /// </summary> public IEnumerable<ExecutionResponse> Execute( VirtualMachine virtualMachine, ExecutionContext[] executionContexts, ExecutableInstruction[] instructions) { var warp = new Warp(executionContexts.Length); var activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, warp.DivergenceStack.Peek()); var topOfDivergenceStack = warp.DivergenceStack.Peek(); while (topOfDivergenceStack.NextPC < instructions.Length) { var instruction = instructions[topOfDivergenceStack.NextPC]; List<BitArray> activeMasks = null; switch (instruction.OpcodeType) { case ExecutableOpcodeType.Add: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Add); break; case ExecutableOpcodeType.And: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.UInt, InstructionImplementations.And); break; case ExecutableOpcodeType.Branch: break; case ExecutableOpcodeType.BranchC: activeMasks = new List<BitArray> { new BitArray(executionContexts.Length), new BitArray(executionContexts.Length) }; foreach (var thread in activeExecutionContexts) { var src0 = GetOperandValue(thread, instruction.Operands[0], NumberType.UInt); bool result = TestCondition(ref src0, instruction.TestBoolean); activeMasks[0][thread.Index] = !result; activeMasks[1][thread.Index] = result; } break; case ExecutableOpcodeType.Cut: case ExecutableOpcodeType.CutStream: yield return ExecutionResponse.Cut; break; case ExecutableOpcodeType.Discard: throw new NotImplementedException(); case ExecutableOpcodeType.Div: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Div); break; case ExecutableOpcodeType.Dp2: foreach (var thread in activeExecutionContexts) ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp2); break; case ExecutableOpcodeType.Dp3: foreach (var thread in activeExecutionContexts) ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp3); break; case ExecutableOpcodeType.Dp4: foreach (var thread in activeExecutionContexts) ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp4); break; case ExecutableOpcodeType.Emit: case ExecutableOpcodeType.EmitStream: yield return ExecutionResponse.Emit; break; case ExecutableOpcodeType.Eq: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Eq); break; case ExecutableOpcodeType.Exp: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Exp); break; case ExecutableOpcodeType.Frc: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Frc); break; case ExecutableOpcodeType.FtoI: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoI); break; case ExecutableOpcodeType.FtoU: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoU); break; case ExecutableOpcodeType.Ge: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ge); break; case ExecutableOpcodeType.IAdd: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IAdd); break; case ExecutableOpcodeType.IEq: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IEq); break; case ExecutableOpcodeType.IGe: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IGe); break; case ExecutableOpcodeType.ILt: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.ILt); break; case ExecutableOpcodeType.IMad: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMad); break; case ExecutableOpcodeType.IMin: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMin); break; case ExecutableOpcodeType.INe: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.INe); break; case ExecutableOpcodeType.INeg: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.INeg); break; case ExecutableOpcodeType.IShl: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShl); break; case ExecutableOpcodeType.IShr: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShr); break; case ExecutableOpcodeType.ItoF: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.ItoF); break; case ExecutableOpcodeType.Log: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Log); break; case ExecutableOpcodeType.Lt: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Lt); break; case ExecutableOpcodeType.Mad: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mad); break; case ExecutableOpcodeType.Max: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Max); break; case ExecutableOpcodeType.Min: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Min); break; case ExecutableOpcodeType.Mov: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mov); break; case ExecutableOpcodeType.MovC: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.MovC); break; case ExecutableOpcodeType.Mul: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mul); break; case ExecutableOpcodeType.Ne: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ne); break; case ExecutableOpcodeType.Ret: yield return ExecutionResponse.Finished; break; case ExecutableOpcodeType.RoundNe: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNe); break; case ExecutableOpcodeType.RoundNi: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNi); break; case ExecutableOpcodeType.RoundPi: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundPi); break; case ExecutableOpcodeType.RoundZ: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundZ); break; case ExecutableOpcodeType.Rsq: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Rsq); break; case ExecutableOpcodeType.DerivRtx: case ExecutableOpcodeType.RtxCoarse: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var deltaX = Number4.Subtract(ref topRight, ref topLeft); for (var j = i; j < i + 4; j++) SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaX); } break; case ExecutableOpcodeType.RtxFine: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float); var topDeltaX = Number4.Subtract(ref topRight, ref topLeft); var bottomDeltaX = Number4.Subtract(ref bottomRight, ref bottomLeft); SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], topDeltaX); SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], topDeltaX); SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], bottomDeltaX); SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], bottomDeltaX); } break; case ExecutableOpcodeType.DerivRty: case ExecutableOpcodeType.RtyCoarse: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft); for (var j = i; j < i + 4; j++) SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaY); } break; case ExecutableOpcodeType.RtyFine: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float); var leftDeltaY = Number4.Subtract(ref bottomLeft, ref topLeft); var rightDeltaY = Number4.Subtract(ref bottomRight, ref topRight); SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], leftDeltaY); SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], rightDeltaY); SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], leftDeltaY); SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], rightDeltaY); } break; case ExecutableOpcodeType.Sample: { var srcResourceIndex = instruction.Operands[2].Indices[0].Value; var srcResource = virtualMachine.Textures[srcResourceIndex]; var srcSampler = virtualMachine.Samplers[instruction.Operands[3].Indices[0].Value]; var textureSampler = virtualMachine.TextureSamplers[srcResourceIndex]; if (textureSampler == null || srcResource == null) { var zero = new Number4(); foreach (var context in executionContexts) SetRegisterValue(context, instruction.Operands[0], zero); } else { for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float); var deltaX = Number4.Subtract(ref topRight, ref topLeft); var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft); SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref topLeft, ref deltaX, ref deltaY)); SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref topRight, ref deltaX, ref deltaY)); SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref bottomLeft, ref deltaX, ref deltaY)); SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref bottomRight, ref deltaX, ref deltaY)); } } break; } case ExecutableOpcodeType.Sqrt: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Sqrt); break; case ExecutableOpcodeType.UtoF: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.UInt, InstructionImplementations.UtoF); break; case ExecutableOpcodeType.Xor: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.UInt, InstructionImplementations.Xor); break; default: throw new InvalidOperationException(instruction.OpcodeType + " is not yet supported."); } // Algorithm from "Dynamic Warp Formation: Exploiting Thread Scheduling for Efficient MIMD Control Flow // on SIMD Graphics Hardware" by Wilson Wai Lun Fung - // https://circle.ubc.ca/bitstream/handle/2429/2268/ubc_2008_fall_fung_wilson_wai_lun.pdf?sequence=1 // // 3 possible cases: // - No Divergence (single next PC) // => Update the next PC field of the top of stack (TOS) entry to // the next PC of all active threads in this warp. // - Divergence (multiple next PC) // => Modify the next PC field of the TOS entry to the reconvergence point. // For each unique next PC of the warp, push a // new entry onto the stack with next PC field being the unique // next PC and the reconv. PC being the reconvergence point. // The active mask of each entry denotes the threads branching // to the next PC value of this entry. // - Reconvergence (next PC = reconv. PC of TOS) // => Pop TOS entry from the stack. if (instruction.UpdateDivergenceStack(warp.DivergenceStack, activeMasks)) { activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, topOfDivergenceStack); topOfDivergenceStack = warp.DivergenceStack.Peek(); } } }