/// <summary> /// http://http.developer.nvidia.com/GPUGems2/gpugems2_chapter34.html /// http://people.maths.ox.ac.uk/gilesm/pp10/lec2_2x2.pdf /// http://stackoverflow.com/questions/10119796/how-does-cuda-compiler-know-the-divergence-behaviour-of-warps /// http://www.istc-cc.cmu.edu/publications/papers/2011/SIMD.pdf /// http://hal.archives-ouvertes.fr/docs/00/62/26/54/PDF/collange_sympa2011_en.pdf /// http://users.ece.cmu.edu/~omutlu/pub/large-gpu-warps_micro11.pdf /// http://www.eecis.udel.edu/~cavazos/cisc879/papers/a3-han.pdf /// </summary> public IEnumerable <ExecutionResponse> Execute( VirtualMachine virtualMachine, ExecutionContext[] executionContexts, ExecutableInstruction[] instructions) { var warp = new Warp(executionContexts.Length); var activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, warp.DivergenceStack.Peek()); var topOfDivergenceStack = warp.DivergenceStack.Peek(); while (topOfDivergenceStack.NextPC < instructions.Length) { var instruction = instructions[topOfDivergenceStack.NextPC]; List <BitArray> activeMasks = null; switch (instruction.OpcodeType) { case ExecutableOpcodeType.Add: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Add); } break; case ExecutableOpcodeType.And: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.UInt, InstructionImplementations.And); } break; case ExecutableOpcodeType.Branch: break; case ExecutableOpcodeType.BranchC: activeMasks = new List <BitArray> { new BitArray(executionContexts.Length), new BitArray(executionContexts.Length) }; foreach (var thread in activeExecutionContexts) { var src0 = GetOperandValue(thread, instruction.Operands[0], NumberType.UInt); bool result = TestCondition(ref src0, instruction.TestBoolean); activeMasks[0][thread.Index] = !result; activeMasks[1][thread.Index] = result; } break; case ExecutableOpcodeType.Cut: case ExecutableOpcodeType.CutStream: yield return(ExecutionResponse.Cut); break; case ExecutableOpcodeType.Discard: throw new NotImplementedException(); case ExecutableOpcodeType.Div: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Div); } break; case ExecutableOpcodeType.Dp2: foreach (var thread in activeExecutionContexts) { ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp2); } break; case ExecutableOpcodeType.Dp3: foreach (var thread in activeExecutionContexts) { ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp3); } break; case ExecutableOpcodeType.Dp4: foreach (var thread in activeExecutionContexts) { ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp4); } break; case ExecutableOpcodeType.Emit: case ExecutableOpcodeType.EmitStream: yield return(ExecutionResponse.Emit); break; case ExecutableOpcodeType.Eq: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Eq); } break; case ExecutableOpcodeType.Exp: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Exp); } break; case ExecutableOpcodeType.Frc: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Frc); } break; case ExecutableOpcodeType.FtoI: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoI); } break; case ExecutableOpcodeType.FtoU: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoU); } break; case ExecutableOpcodeType.Ge: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ge); } break; case ExecutableOpcodeType.IAdd: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.IAdd); } break; case ExecutableOpcodeType.IEq: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.IEq); } break; case ExecutableOpcodeType.IGe: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.IGe); } break; case ExecutableOpcodeType.ILt: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.ILt); } break; case ExecutableOpcodeType.IMad: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMad); } break; case ExecutableOpcodeType.IMin: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMin); } break; case ExecutableOpcodeType.INe: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.INe); } break; case ExecutableOpcodeType.INeg: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.INeg); } break; case ExecutableOpcodeType.IShl: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShl); } break; case ExecutableOpcodeType.IShr: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShr); } break; case ExecutableOpcodeType.ItoF: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Int, InstructionImplementations.ItoF); } break; case ExecutableOpcodeType.Log: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Log); } break; case ExecutableOpcodeType.Lt: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Lt); } break; case ExecutableOpcodeType.Mad: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mad); } break; case ExecutableOpcodeType.Max: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Max); } break; case ExecutableOpcodeType.Min: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Min); } break; case ExecutableOpcodeType.Mov: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mov); } break; case ExecutableOpcodeType.MovC: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.MovC); } break; case ExecutableOpcodeType.Mul: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mul); } break; case ExecutableOpcodeType.Ne: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ne); } break; case ExecutableOpcodeType.Ret: yield return(ExecutionResponse.Finished); break; case ExecutableOpcodeType.RoundNe: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNe); } break; case ExecutableOpcodeType.RoundNi: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNi); } break; case ExecutableOpcodeType.RoundPi: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundPi); } break; case ExecutableOpcodeType.RoundZ: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundZ); } break; case ExecutableOpcodeType.Rsq: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Rsq); } break; case ExecutableOpcodeType.DerivRtx: case ExecutableOpcodeType.RtxCoarse: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var deltaX = Number4.Subtract(ref topRight, ref topLeft); for (var j = i; j < i + 4; j++) { SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaX); } } break; case ExecutableOpcodeType.RtxFine: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float); var topDeltaX = Number4.Subtract(ref topRight, ref topLeft); var bottomDeltaX = Number4.Subtract(ref bottomRight, ref bottomLeft); SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], topDeltaX); SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], topDeltaX); SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], bottomDeltaX); SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], bottomDeltaX); } break; case ExecutableOpcodeType.DerivRty: case ExecutableOpcodeType.RtyCoarse: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft); for (var j = i; j < i + 4; j++) { SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaY); } } break; case ExecutableOpcodeType.RtyFine: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float); var leftDeltaY = Number4.Subtract(ref bottomLeft, ref topLeft); var rightDeltaY = Number4.Subtract(ref bottomRight, ref topRight); SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], leftDeltaY); SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], rightDeltaY); SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], leftDeltaY); SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], rightDeltaY); } break; case ExecutableOpcodeType.Sample: { var srcResourceIndex = instruction.Operands[2].Indices[0].Value; var srcResource = virtualMachine.Textures[srcResourceIndex]; var srcSampler = virtualMachine.Samplers[instruction.Operands[3].Indices[0].Value]; var textureSampler = virtualMachine.TextureSamplers[srcResourceIndex]; if (textureSampler == null || srcResource == null) { var zero = new Number4(); foreach (var context in executionContexts) { SetRegisterValue(context, instruction.Operands[0], zero); } } else { for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float); var deltaX = Number4.Subtract(ref topRight, ref topLeft); var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft); SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref topLeft, ref deltaX, ref deltaY)); SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref topRight, ref deltaX, ref deltaY)); SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref bottomLeft, ref deltaX, ref deltaY)); SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref bottomRight, ref deltaX, ref deltaY)); } } break; } case ExecutableOpcodeType.Sqrt: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.Float, InstructionImplementations.Sqrt); } break; case ExecutableOpcodeType.UtoF: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.UInt, InstructionImplementations.UtoF); } break; case ExecutableOpcodeType.Xor: foreach (var thread in activeExecutionContexts) { Execute(thread, instruction, NumberType.UInt, InstructionImplementations.Xor); } break; default: throw new InvalidOperationException(instruction.OpcodeType + " is not yet supported."); } // Algorithm from "Dynamic Warp Formation: Exploiting Thread Scheduling for Efficient MIMD Control Flow // on SIMD Graphics Hardware" by Wilson Wai Lun Fung - // https://circle.ubc.ca/bitstream/handle/2429/2268/ubc_2008_fall_fung_wilson_wai_lun.pdf?sequence=1 // // 3 possible cases: // - No Divergence (single next PC) // => Update the next PC field of the top of stack (TOS) entry to // the next PC of all active threads in this warp. // - Divergence (multiple next PC) // => Modify the next PC field of the TOS entry to the reconvergence point. // For each unique next PC of the warp, push a // new entry onto the stack with next PC field being the unique // next PC and the reconv. PC being the reconvergence point. // The active mask of each entry denotes the threads branching // to the next PC value of this entry. // - Reconvergence (next PC = reconv. PC of TOS) // => Pop TOS entry from the stack. if (instruction.UpdateDivergenceStack(warp.DivergenceStack, activeMasks)) { activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, topOfDivergenceStack); topOfDivergenceStack = warp.DivergenceStack.Peek(); } } }
/// <summary> /// http://http.developer.nvidia.com/GPUGems2/gpugems2_chapter34.html /// http://people.maths.ox.ac.uk/gilesm/pp10/lec2_2x2.pdf /// http://stackoverflow.com/questions/10119796/how-does-cuda-compiler-know-the-divergence-behaviour-of-warps /// http://www.istc-cc.cmu.edu/publications/papers/2011/SIMD.pdf /// http://hal.archives-ouvertes.fr/docs/00/62/26/54/PDF/collange_sympa2011_en.pdf /// http://users.ece.cmu.edu/~omutlu/pub/large-gpu-warps_micro11.pdf /// http://www.eecis.udel.edu/~cavazos/cisc879/papers/a3-han.pdf /// </summary> public IEnumerable<ExecutionResponse> Execute( VirtualMachine virtualMachine, ExecutionContext[] executionContexts, ExecutableInstruction[] instructions) { var warp = new Warp(executionContexts.Length); var activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, warp.DivergenceStack.Peek()); var topOfDivergenceStack = warp.DivergenceStack.Peek(); while (topOfDivergenceStack.NextPC < instructions.Length) { var instruction = instructions[topOfDivergenceStack.NextPC]; List<BitArray> activeMasks = null; switch (instruction.OpcodeType) { case ExecutableOpcodeType.Add: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Add); break; case ExecutableOpcodeType.And: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.UInt, InstructionImplementations.And); break; case ExecutableOpcodeType.Branch: break; case ExecutableOpcodeType.BranchC: activeMasks = new List<BitArray> { new BitArray(executionContexts.Length), new BitArray(executionContexts.Length) }; foreach (var thread in activeExecutionContexts) { var src0 = GetOperandValue(thread, instruction.Operands[0], NumberType.UInt); bool result = TestCondition(ref src0, instruction.TestBoolean); activeMasks[0][thread.Index] = !result; activeMasks[1][thread.Index] = result; } break; case ExecutableOpcodeType.Cut: case ExecutableOpcodeType.CutStream: yield return ExecutionResponse.Cut; break; case ExecutableOpcodeType.Discard: throw new NotImplementedException(); case ExecutableOpcodeType.Div: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Div); break; case ExecutableOpcodeType.Dp2: foreach (var thread in activeExecutionContexts) ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp2); break; case ExecutableOpcodeType.Dp3: foreach (var thread in activeExecutionContexts) ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp3); break; case ExecutableOpcodeType.Dp4: foreach (var thread in activeExecutionContexts) ExecuteScalar(thread, instruction, NumberType.Float, InstructionImplementations.Dp4); break; case ExecutableOpcodeType.Emit: case ExecutableOpcodeType.EmitStream: yield return ExecutionResponse.Emit; break; case ExecutableOpcodeType.Eq: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Eq); break; case ExecutableOpcodeType.Exp: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Exp); break; case ExecutableOpcodeType.Frc: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Frc); break; case ExecutableOpcodeType.FtoI: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoI); break; case ExecutableOpcodeType.FtoU: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.FtoU); break; case ExecutableOpcodeType.Ge: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ge); break; case ExecutableOpcodeType.IAdd: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IAdd); break; case ExecutableOpcodeType.IEq: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IEq); break; case ExecutableOpcodeType.IGe: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IGe); break; case ExecutableOpcodeType.ILt: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.ILt); break; case ExecutableOpcodeType.IMad: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMad); break; case ExecutableOpcodeType.IMin: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IMin); break; case ExecutableOpcodeType.INe: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.INe); break; case ExecutableOpcodeType.INeg: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.INeg); break; case ExecutableOpcodeType.IShl: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShl); break; case ExecutableOpcodeType.IShr: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.IShr); break; case ExecutableOpcodeType.ItoF: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Int, InstructionImplementations.ItoF); break; case ExecutableOpcodeType.Log: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Log); break; case ExecutableOpcodeType.Lt: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Lt); break; case ExecutableOpcodeType.Mad: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mad); break; case ExecutableOpcodeType.Max: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Max); break; case ExecutableOpcodeType.Min: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Min); break; case ExecutableOpcodeType.Mov: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mov); break; case ExecutableOpcodeType.MovC: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.MovC); break; case ExecutableOpcodeType.Mul: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Mul); break; case ExecutableOpcodeType.Ne: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Ne); break; case ExecutableOpcodeType.Ret: yield return ExecutionResponse.Finished; break; case ExecutableOpcodeType.RoundNe: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNe); break; case ExecutableOpcodeType.RoundNi: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundNi); break; case ExecutableOpcodeType.RoundPi: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundPi); break; case ExecutableOpcodeType.RoundZ: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.RoundZ); break; case ExecutableOpcodeType.Rsq: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Rsq); break; case ExecutableOpcodeType.DerivRtx: case ExecutableOpcodeType.RtxCoarse: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var deltaX = Number4.Subtract(ref topRight, ref topLeft); for (var j = i; j < i + 4; j++) SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaX); } break; case ExecutableOpcodeType.RtxFine: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float); var topDeltaX = Number4.Subtract(ref topRight, ref topLeft); var bottomDeltaX = Number4.Subtract(ref bottomRight, ref bottomLeft); SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], topDeltaX); SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], topDeltaX); SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], bottomDeltaX); SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], bottomDeltaX); } break; case ExecutableOpcodeType.DerivRty: case ExecutableOpcodeType.RtyCoarse: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft); for (var j = i; j < i + 4; j++) SetRegisterValue(executionContexts[j], instruction.Operands[0], deltaY); } break; case ExecutableOpcodeType.RtyFine: for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float); var leftDeltaY = Number4.Subtract(ref bottomLeft, ref topLeft); var rightDeltaY = Number4.Subtract(ref bottomRight, ref topRight); SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], leftDeltaY); SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], rightDeltaY); SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], leftDeltaY); SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], rightDeltaY); } break; case ExecutableOpcodeType.Sample: { var srcResourceIndex = instruction.Operands[2].Indices[0].Value; var srcResource = virtualMachine.Textures[srcResourceIndex]; var srcSampler = virtualMachine.Samplers[instruction.Operands[3].Indices[0].Value]; var textureSampler = virtualMachine.TextureSamplers[srcResourceIndex]; if (textureSampler == null || srcResource == null) { var zero = new Number4(); foreach (var context in executionContexts) SetRegisterValue(context, instruction.Operands[0], zero); } else { for (var i = 0; i < executionContexts.Length; i += 4) { var topLeft = GetOperandValue(executionContexts[i + 0], instruction.Operands[1], NumberType.Float); var topRight = GetOperandValue(executionContexts[i + 1], instruction.Operands[1], NumberType.Float); var bottomLeft = GetOperandValue(executionContexts[i + 2], instruction.Operands[1], NumberType.Float); var bottomRight = GetOperandValue(executionContexts[i + 3], instruction.Operands[1], NumberType.Float); var deltaX = Number4.Subtract(ref topRight, ref topLeft); var deltaY = Number4.Subtract(ref bottomLeft, ref topLeft); SetRegisterValue(executionContexts[i + 0], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref topLeft, ref deltaX, ref deltaY)); SetRegisterValue(executionContexts[i + 1], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref topRight, ref deltaX, ref deltaY)); SetRegisterValue(executionContexts[i + 2], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref bottomLeft, ref deltaX, ref deltaY)); SetRegisterValue(executionContexts[i + 3], instruction.Operands[0], textureSampler.SampleGrad(srcResource, srcSampler, ref bottomRight, ref deltaX, ref deltaY)); } } break; } case ExecutableOpcodeType.Sqrt: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.Float, InstructionImplementations.Sqrt); break; case ExecutableOpcodeType.UtoF: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.UInt, InstructionImplementations.UtoF); break; case ExecutableOpcodeType.Xor: foreach (var thread in activeExecutionContexts) Execute(thread, instruction, NumberType.UInt, InstructionImplementations.Xor); break; default: throw new InvalidOperationException(instruction.OpcodeType + " is not yet supported."); } // Algorithm from "Dynamic Warp Formation: Exploiting Thread Scheduling for Efficient MIMD Control Flow // on SIMD Graphics Hardware" by Wilson Wai Lun Fung - // https://circle.ubc.ca/bitstream/handle/2429/2268/ubc_2008_fall_fung_wilson_wai_lun.pdf?sequence=1 // // 3 possible cases: // - No Divergence (single next PC) // => Update the next PC field of the top of stack (TOS) entry to // the next PC of all active threads in this warp. // - Divergence (multiple next PC) // => Modify the next PC field of the TOS entry to the reconvergence point. // For each unique next PC of the warp, push a // new entry onto the stack with next PC field being the unique // next PC and the reconv. PC being the reconvergence point. // The active mask of each entry denotes the threads branching // to the next PC value of this entry. // - Reconvergence (next PC = reconv. PC of TOS) // => Pop TOS entry from the stack. if (instruction.UpdateDivergenceStack(warp.DivergenceStack, activeMasks)) { activeExecutionContexts = Warp.GetActiveExecutionContexts(executionContexts, topOfDivergenceStack); topOfDivergenceStack = warp.DivergenceStack.Peek(); } } }