public ScanModule(GPUModuleTarget target, Func <T> init, Func <T, T, T> scanOp, Func <T, T> transform, Plan plan) : base(target) { _init = init; _scanOp = scanOp; _transform = transform; Plan = plan; // ScanReduce values _numThreadsScanReduce = plan.NumThreadsReduction; var numWarpsScanReduce = plan.NumWarpsReduction; var logNumWarpsScanReduce = Alea.CUDA.Utilities.Common.log2(numWarpsScanReduce); _multiScan = MultiScan(init, scanOp, numWarpsScanReduce, logNumWarpsScanReduce); // Downsweep values var numWarpsDownsweep = plan.NumWarps; _numValues = plan.NumValues; _valuesPerThread = plan.ValuesPerThread; _valuesPerWarp = plan.ValuesPerWarp; var logNumWarpsDownsweep = Alea.CUDA.Utilities.Common.log2(numWarpsDownsweep); _size = numWarpsDownsweep * _valuesPerThread * (Const.WARP_SIZE + 1); _multiScanExcl = MultiScanExcl(init, scanOp, numWarpsDownsweep, logNumWarpsDownsweep); _reduceModule = new ReduceModule <T>(target, init, scanOp, transform, plan); }
/// Generic reduction. public static T Reduce <T>(GPUModuleTarget target, Func <T> init, Func <T, T, T> reductionOp, Func <T, T> transf, T[] input) { return ((new ReduceModule <T>( target, init, reductionOp, transf, Intrinsic.__sizeof <T>() == 4 ? Plan.Plan32 : Plan.Plan64) ).Apply(input)); }
/// Generic scan. public static T[] Scan <T>(GPUModuleTarget target, Func <T> init, Func <T, T, T> scanOp, Func <T, T> transf, T[] input, bool inclusive) { return ((new ScanModule <T>( target, init, scanOp, transf, Intrinsic.__sizeof <T>() == 4 ? Plan.Plan32 : Plan.Plan64) ).Apply(input, inclusive)); }
public ScalarProdModule(GPUModuleTarget target, Plan plan, Func <T> init, Func <T, T, T> add, Func <T, T, T> mult) : base(target) { _plan = plan; _mult = mult; _add = add; var logNumWarps = Alea.CUDA.Utilities.Common.log2(plan.NumWarps); _multiReduce = ReduceModule <T> .MultiReduce(init, add, plan.NumWarps, logNumWarps); _reduce = new ReduceModule <T>(target, init, add, x => x, plan); }
public ReduceModule(GPUModuleTarget target, Func <T> initFunc, Func <T, T, T> reductionOp, Func <T, T> transform, Plan plan) : base(target) { _initFunc = initFunc; _reductionOp = reductionOp; _transform = transform; Plan = plan; _numThreads = plan.NumThreads; var numWarps = plan.NumWarps; var logNumWarps = Common.log2(numWarps); _multiReduce = MultiReduce(initFunc, reductionOp, numWarps, logNumWarps); }
public GpuDynamicSimulatorModule(GPUModuleTarget target) : base(target) { }
public SimpleD3D9(GPUModuleTarget target) : base(target) { }
public MatrixTransposeModule(GPUModuleTarget target, int tileDim, int blockRows) : base(target) { TileDim = tileDim; BlockRows = blockRows; }
//[/LockPositions] public SimWindow() : base(800, 600, GraphicsMode.Default, "Gravitational n-body simulation") { _numBodies = 256 * 64; const float clusterScale = 1.0f; const float velocityScale = 1.0f; _deltaTime = 0.001f; _softeningSquared = 0.00125f; _damping = 0.9995f; //[CreateWorker] _worker = Worker.CreateByFunc(Generate); //[/CreateWorker] _stopwatch = Stopwatch.StartNew(); _fpsCalcLag = 128; _frameCounter = 0; //[CreateSimulatros] _simulators = new Queue <ISimulator>(); var target = GPUModuleTarget.Worker(_worker); var simulatorGpuDynamicBlockSizeModule = new GpuDynamicSimulatorModule(target); // need dispose var simulatorGpuDynamicBlockSize64 = simulatorGpuDynamicBlockSizeModule.Create(64); var simulatorGpuDynamicBlockSize128 = simulatorGpuDynamicBlockSizeModule.Create(128); var simulatorGpuDynamicBlockSize256 = simulatorGpuDynamicBlockSizeModule.Create(256); var simulatorGpuDynamicBlockSize512 = simulatorGpuDynamicBlockSizeModule.Create(512); var simulatorGpuStaticBlockSizeModule64 = new GpuStaticSimulatorModule64(target); // need dispose var simulatorGpuStaticBlockSizeModule128 = new GpuStaticSimulatorModule128(target); // need dispose var simulatorGpuStaticBlockSizeModule256 = new GpuStaticSimulatorModule256(target); // need dispose var simulatorGpuStaticBlockSizeModule512 = new GpuStaticSimulatorModule512(target); // need dispose // First, enquene one simulator which is 256 blocksize so we can compare with C code for performance. _simulators.Enqueue(simulatorGpuStaticBlockSizeModule256); // Enqueue several dynamic block size simulators. _simulators.Enqueue(simulatorGpuDynamicBlockSize64); _simulators.Enqueue(simulatorGpuDynamicBlockSize128); _simulators.Enqueue(simulatorGpuDynamicBlockSize256); _simulators.Enqueue(simulatorGpuDynamicBlockSize512); // Enqueue several static block size simulators. _simulators.Enqueue(simulatorGpuStaticBlockSizeModule64); _simulators.Enqueue(simulatorGpuStaticBlockSizeModule128); _simulators.Enqueue(simulatorGpuStaticBlockSizeModule256); _simulators.Enqueue(simulatorGpuStaticBlockSizeModule512); // We do not enqueue any cpu simulator as it is much too slow. //_simulators.Enqueue(new CpuSimulator(_worker, _numBodies)); _disposeSimulators = () => { simulatorGpuDynamicBlockSizeModule.Dispose(); simulatorGpuStaticBlockSizeModule64.Dispose(); simulatorGpuStaticBlockSizeModule128.Dispose(); simulatorGpuStaticBlockSizeModule256.Dispose(); simulatorGpuStaticBlockSizeModule512.Dispose(); }; _simulator = _simulators.Dequeue(); //[/CreateSimulatros] //[CreateBuffers] _buffers = new uint[2]; for (var i = 0; i < _buffers.Length; i++) { _buffers[i] = 0; } GL.GenBuffers(_buffers.Length, _buffers); foreach (var buffer in _buffers) { GL.BindBuffer(BufferTarget.ArrayBuffer, buffer); GL.BufferData(BufferTarget.ArrayBuffer, (IntPtr)(Microsoft.FSharp.Core.Operators.SizeOf <float4>() * _numBodies), IntPtr.Zero, BufferUsageHint.DynamicDraw); var size = 0; unsafe { GL.GetBufferParameter(BufferTarget.ArrayBuffer, BufferParameterName.BufferSize, &size); } if (size != Microsoft.FSharp.Core.Operators.SizeOf <float4>() * _numBodies) { throw new Exception("Pixel Buffer Object allocation failed!"); } GL.BindBuffer(BufferTarget.ArrayBuffer, 0); CUDAInterop.cuSafeCall(CUDAInterop.cuGLRegisterBufferObject(buffer)); } _resources = new IntPtr[_buffers.Length]; for (var i = 0; i < _buffers.Length; i++) { var res = IntPtr.Zero; unsafe { CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsGLRegisterBuffer(&res, _buffers[i], 0u)); } _resources[i] = res; } //[/CreateBuffers] //[FinalizeGL] _vel = _worker.Malloc <float4>(_numBodies); float4[] hpos, hvel; BodyInitializer.Initialize(new BodyInitializer3(), clusterScale, velocityScale, _numBodies, out hpos, out hvel); _worker.Scatter(hvel, _vel.Ptr, Microsoft.FSharp.Core.FSharpOption <int> .None, Microsoft.FSharp.Core.FSharpOption <int> .None); LockPos( (pos0, pos1) => _worker.Scatter(hpos, pos1, Microsoft.FSharp.Core.FSharpOption <int> .None, Microsoft.FSharp.Core.FSharpOption <int> .None)); Help(); Description(); //[/FinalizeGL] }
public GpuStaticSimulatorModule512(GPUModuleTarget target) : base(target, 512) { }
public GpuStaticSimulatorModule128(GPUModuleTarget target) : base(target, 128) { }
public GPUModule (GPUModuleTarget target) : base(target) { }
public TriDiagSolverModule(GPUModuleTarget target) : base(target) { }
public DefaultMatrixMultiplyModule(GPUModuleTarget target) : base(target, 32) { }
public MatrixMultiplyModule(GPUModuleTarget target, int blockSize) : base(target) { BlockSize = blockSize; }
public GpuStaticSimulatorModule(GPUModuleTarget target, int blockSize) : base(target) { _blockSize = blockSize; _description = string.Format("GPU.StaticBlockSize({0})", _blockSize); }
public GpuStaticSimulatorModule64(GPUModuleTarget target) : base(target, 64) { }
public InstanceUsageAOT(GPUModuleTarget target) : base(target) { }
public GpuStaticSimulatorModule256(GPUModuleTarget target) : base(target, 256) { }
public SinCosModule(GPUModuleTarget target) : base(target, (a, b) => LibDevice.__nv_sin(a) + LibDevice.__nv_cos(b)) { }
public PiEstimatorModule(GPUModuleTarget target) : base(target) { }
public SinModule(GPUModuleTarget target) : base(target, LibDevice.__nv_sin) { }
public TransformModule(GPUModuleTarget target, Func <T, T, T> opFunc) : base(target) { op = opFunc; }
public DefaultPiEstimatorModule(GPUModuleTarget target) : base(target) { }
public MatrixTransposeF64(GPUModuleTarget target) : base(target, 32, 8) { }
public static void Main() { var form = new RenderForm("SimpleD3D9 by C#") { ClientSize = new Size(1024, 768) }; var device = new D3D9Device( new Direct3D(), CUDADevice.Default.ID, DeviceType.Hardware, form.Handle, CreateFlags.HardwareVertexProcessing, new PresentParameters(form.ClientSize.Width, form.ClientSize.Height)); var vertices = new VertexBuffer(device, Utilities.SizeOf <Vector4>() * Total, Usage.WriteOnly, VertexFormat.None, Pool.Default); var vertexElems = new [] { new VertexElement(0, 0, DeclarationType.Float3, DeclarationMethod.Default, DeclarationUsage.Position, 0), new VertexElement(0, 12, DeclarationType.Ubyte4, DeclarationMethod.Default, DeclarationUsage.Color, 0), VertexElement.VertexDeclarationEnd }; var vertexDecl = new VertexDeclaration(device, vertexElems); var worker = Worker.CreateByFunc(() => Generate(device)); var updater = new SimpleD3D9(GPUModuleTarget.Worker(worker)); var view = Matrix.LookAtLH( new Vector3(0.0f, 3.0f, -2.0f), // the camera position new Vector3(0.0f, 0.0f, 0.0f), // the look-at position new Vector3(0.0f, 1.0f, 0.0f)); // the up direction var proj = Matrix.PerspectiveFovLH( (float)(Math.PI / 4.0), // the horizontal field of view 1.0f, 1.0f, 100.0f); device.SetTransform(TransformState.View, view); device.SetTransform(TransformState.Projection, proj); device.SetRenderState(RenderState.Lighting, false); var vbres = RegisterVerticesResource(vertices); var clock = System.Diagnostics.Stopwatch.StartNew(); RenderLoop.Run(form, () => { var time = (float)(clock.Elapsed.TotalMilliseconds) / 300.0f; updater.Update(vbres, time); // Now normal D3D9 rendering procedure. device.Clear(ClearFlags.Target | ClearFlags.ZBuffer, new ColorBGRA(0, 40, 100, 0), 1.0f, 0); device.BeginScene(); device.VertexDeclaration = vertexDecl; device.SetStreamSource(0, vertices, 0, Utilities.SizeOf <Vector4>()); // we use PointList as the graphics primitives device.DrawPrimitives(SharpDX.Direct3D9.PrimitiveType.PointList, 0, Total); device.EndScene(); device.Present(); }); UnregisterVerticesResource(vbres); updater.Dispose(); worker.Dispose(); vertexDecl.Dispose(); vertices.Dispose(); device.Dispose(); form.Dispose(); }