예제 #1
0
        public ScanModule(GPUModuleTarget target, Func <T> init, Func <T, T, T> scanOp, Func <T, T> transform, Plan plan) : base(target)
        {
            _init      = init;
            _scanOp    = scanOp;
            _transform = transform;
            Plan       = plan;

            // ScanReduce values
            _numThreadsScanReduce = plan.NumThreadsReduction;
            var numWarpsScanReduce    = plan.NumWarpsReduction;
            var logNumWarpsScanReduce = Alea.CUDA.Utilities.Common.log2(numWarpsScanReduce);

            _multiScan = MultiScan(init, scanOp, numWarpsScanReduce, logNumWarpsScanReduce);

            // Downsweep values
            var numWarpsDownsweep = plan.NumWarps;

            _numValues       = plan.NumValues;
            _valuesPerThread = plan.ValuesPerThread;
            _valuesPerWarp   = plan.ValuesPerWarp;
            var logNumWarpsDownsweep = Alea.CUDA.Utilities.Common.log2(numWarpsDownsweep);

            _size          = numWarpsDownsweep * _valuesPerThread * (Const.WARP_SIZE + 1);
            _multiScanExcl = MultiScanExcl(init, scanOp, numWarpsDownsweep, logNumWarpsDownsweep);

            _reduceModule = new ReduceModule <T>(target, init, scanOp, transform, plan);
        }
예제 #2
0
 /// Generic reduction.
 public static T Reduce <T>(GPUModuleTarget target, Func <T> init, Func <T, T, T> reductionOp, Func <T, T> transf, T[] input)
 {
     return
         ((new ReduceModule <T>(
               target,
               init,
               reductionOp,
               transf,
               Intrinsic.__sizeof <T>() == 4 ? Plan.Plan32 : Plan.Plan64)
           ).Apply(input));
 }
예제 #3
0
 /// Generic scan.
 public static T[] Scan <T>(GPUModuleTarget target, Func <T> init, Func <T, T, T> scanOp, Func <T, T> transf, T[] input, bool inclusive)
 {
     return
         ((new ScanModule <T>(
               target,
               init,
               scanOp,
               transf,
               Intrinsic.__sizeof <T>() == 4 ? Plan.Plan32 : Plan.Plan64)
           ).Apply(input, inclusive));
 }
예제 #4
0
        public ScalarProdModule(GPUModuleTarget target, Plan plan, Func <T> init, Func <T, T, T> add, Func <T, T, T> mult) : base(target)
        {
            _plan = plan;
            _mult = mult;
            _add  = add;
            var logNumWarps = Alea.CUDA.Utilities.Common.log2(plan.NumWarps);

            _multiReduce = ReduceModule <T> .MultiReduce(init, add, plan.NumWarps, logNumWarps);

            _reduce = new ReduceModule <T>(target, init, add, x => x, plan);
        }
예제 #5
0
        public ReduceModule(GPUModuleTarget target, Func <T> initFunc, Func <T, T, T> reductionOp, Func <T, T> transform,
                            Plan plan) : base(target)
        {
            _initFunc    = initFunc;
            _reductionOp = reductionOp;
            _transform   = transform;
            Plan         = plan;
            _numThreads  = plan.NumThreads;
            var numWarps    = plan.NumWarps;
            var logNumWarps = Common.log2(numWarps);

            _multiReduce = MultiReduce(initFunc, reductionOp, numWarps, logNumWarps);
        }
 public GpuDynamicSimulatorModule(GPUModuleTarget target) : base(target)
 {
 }
예제 #7
0
 public SimpleD3D9(GPUModuleTarget target) : base(target)
 {
 }
 public MatrixTransposeModule(GPUModuleTarget target, int tileDim, int blockRows)
     : base(target)
 {
     TileDim   = tileDim;
     BlockRows = blockRows;
 }
예제 #9
0
 public SimpleD3D9(GPUModuleTarget target) : base(target)
 {
 }
예제 #10
0
        //[/LockPositions]

        public SimWindow() : base(800, 600, GraphicsMode.Default, "Gravitational n-body simulation")
        {
            _numBodies = 256 * 64;
            const float clusterScale  = 1.0f;
            const float velocityScale = 1.0f;

            _deltaTime        = 0.001f;
            _softeningSquared = 0.00125f;
            _damping          = 0.9995f;
            //[CreateWorker]
            _worker = Worker.CreateByFunc(Generate);
            //[/CreateWorker]

            _stopwatch    = Stopwatch.StartNew();
            _fpsCalcLag   = 128;
            _frameCounter = 0;

            //[CreateSimulatros]
            _simulators = new Queue <ISimulator>();
            var target = GPUModuleTarget.Worker(_worker);

            var simulatorGpuDynamicBlockSizeModule = new GpuDynamicSimulatorModule(target);         // need dispose
            var simulatorGpuDynamicBlockSize64     = simulatorGpuDynamicBlockSizeModule.Create(64);
            var simulatorGpuDynamicBlockSize128    = simulatorGpuDynamicBlockSizeModule.Create(128);
            var simulatorGpuDynamicBlockSize256    = simulatorGpuDynamicBlockSizeModule.Create(256);
            var simulatorGpuDynamicBlockSize512    = simulatorGpuDynamicBlockSizeModule.Create(512);

            var simulatorGpuStaticBlockSizeModule64  = new GpuStaticSimulatorModule64(target);      // need dispose
            var simulatorGpuStaticBlockSizeModule128 = new GpuStaticSimulatorModule128(target);     // need dispose
            var simulatorGpuStaticBlockSizeModule256 = new GpuStaticSimulatorModule256(target);     // need dispose
            var simulatorGpuStaticBlockSizeModule512 = new GpuStaticSimulatorModule512(target);     // need dispose

            // First, enquene one simulator which is 256 blocksize so we can compare with C code for performance.
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule256);

            // Enqueue several dynamic block size simulators.
            _simulators.Enqueue(simulatorGpuDynamicBlockSize64);
            _simulators.Enqueue(simulatorGpuDynamicBlockSize128);
            _simulators.Enqueue(simulatorGpuDynamicBlockSize256);
            _simulators.Enqueue(simulatorGpuDynamicBlockSize512);

            // Enqueue several static block size simulators.
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule64);
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule128);
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule256);
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule512);

            // We do not enqueue any cpu simulator as it is much too slow.
            //_simulators.Enqueue(new CpuSimulator(_worker, _numBodies));

            _disposeSimulators = () =>
            {
                simulatorGpuDynamicBlockSizeModule.Dispose();
                simulatorGpuStaticBlockSizeModule64.Dispose();
                simulatorGpuStaticBlockSizeModule128.Dispose();
                simulatorGpuStaticBlockSizeModule256.Dispose();
                simulatorGpuStaticBlockSizeModule512.Dispose();
            };

            _simulator = _simulators.Dequeue();
            //[/CreateSimulatros]

            //[CreateBuffers]
            _buffers = new uint[2];
            for (var i = 0; i < _buffers.Length; i++)
            {
                _buffers[i] = 0;
            }
            GL.GenBuffers(_buffers.Length, _buffers);
            foreach (var buffer in _buffers)
            {
                GL.BindBuffer(BufferTarget.ArrayBuffer, buffer);
                GL.BufferData(BufferTarget.ArrayBuffer,
                              (IntPtr)(Microsoft.FSharp.Core.Operators.SizeOf <float4>() * _numBodies),
                              IntPtr.Zero, BufferUsageHint.DynamicDraw);
                var size = 0;
                unsafe
                {
                    GL.GetBufferParameter(BufferTarget.ArrayBuffer, BufferParameterName.BufferSize, &size);
                }
                if (size != Microsoft.FSharp.Core.Operators.SizeOf <float4>() * _numBodies)
                {
                    throw new Exception("Pixel Buffer Object allocation failed!");
                }
                GL.BindBuffer(BufferTarget.ArrayBuffer, 0);
                CUDAInterop.cuSafeCall(CUDAInterop.cuGLRegisterBufferObject(buffer));
            }

            _resources = new IntPtr[_buffers.Length];
            for (var i = 0; i < _buffers.Length; i++)
            {
                var res = IntPtr.Zero;
                unsafe
                {
                    CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsGLRegisterBuffer(&res, _buffers[i], 0u));
                }
                _resources[i] = res;
            }
            //[/CreateBuffers]

            //[FinalizeGL]
            _vel = _worker.Malloc <float4>(_numBodies);

            float4[] hpos, hvel;
            BodyInitializer.Initialize(new BodyInitializer3(), clusterScale, velocityScale, _numBodies,
                                       out hpos, out hvel);
            _worker.Scatter(hvel, _vel.Ptr, Microsoft.FSharp.Core.FSharpOption <int> .None,
                            Microsoft.FSharp.Core.FSharpOption <int> .None);
            LockPos(
                (pos0, pos1) =>
                _worker.Scatter(hpos, pos1, Microsoft.FSharp.Core.FSharpOption <int> .None,
                                Microsoft.FSharp.Core.FSharpOption <int> .None));

            Help();
            Description();
            //[/FinalizeGL]
        }
 public GpuStaticSimulatorModule512(GPUModuleTarget target)
     : base(target, 512)
 {
 }
 public GpuStaticSimulatorModule128(GPUModuleTarget target)
     : base(target, 128)
 {
 }
예제 #13
0
 public GPUModule (GPUModuleTarget target) : base(target)
 {
 }
예제 #14
0
 public TriDiagSolverModule(GPUModuleTarget target)
     : base(target)
 {
 }
 public GpuDynamicSimulatorModule(GPUModuleTarget target) : base(target)
 {
 }
예제 #16
0
 public DefaultMatrixMultiplyModule(GPUModuleTarget target)
     : base(target, 32)
 {
 }
예제 #17
0
 public MatrixMultiplyModule(GPUModuleTarget target, int blockSize)
     : base(target)
 {
     BlockSize = blockSize;
 }
 public GpuStaticSimulatorModule(GPUModuleTarget target, int blockSize) : base(target)
 {
     _blockSize = blockSize;
     _description = string.Format("GPU.StaticBlockSize({0})", _blockSize);
 }
 public GpuStaticSimulatorModule64(GPUModuleTarget target)
     : base(target, 64)
 {
 }
예제 #20
0
 public InstanceUsageAOT(GPUModuleTarget target)
     : base(target)
 {
 }
 public GpuStaticSimulatorModule256(GPUModuleTarget target)
     : base(target, 256)
 {
 }
 public SinCosModule(GPUModuleTarget target)
     : base(target, (a, b) => LibDevice.__nv_sin(a) + LibDevice.__nv_cos(b))
 {
 }
예제 #23
0
        public PiEstimatorModule(GPUModuleTarget target)
            : base(target)
        {

        }
예제 #24
0
 public SinModule(GPUModuleTarget target)
     : base(target, LibDevice.__nv_sin)
 {
 }
 public TransformModule(GPUModuleTarget target, Func <T, T, T> opFunc)
     : base(target)
 {
     op = opFunc;
 }
 public GpuStaticSimulatorModule(GPUModuleTarget target, int blockSize) : base(target)
 {
     _blockSize   = blockSize;
     _description = string.Format("GPU.StaticBlockSize({0})", _blockSize);
 }
 public GpuStaticSimulatorModule64(GPUModuleTarget target)
     : base(target, 64)
 {
 }
 public GpuStaticSimulatorModule128(GPUModuleTarget target)
     : base(target, 128)
 {
 }
예제 #29
0
 public DefaultPiEstimatorModule(GPUModuleTarget target)
     : base(target)
 {
 }
 public GpuStaticSimulatorModule256(GPUModuleTarget target)
     : base(target, 256)
 {
 }
 public MatrixTransposeF64(GPUModuleTarget target)
     : base(target, 32, 8)
 {
 }
 public GpuStaticSimulatorModule512(GPUModuleTarget target)
     : base(target, 512)
 {
 }
예제 #33
0
        public static void Main()
        {
            var form = new RenderForm("SimpleD3D9 by C#")
            {
                ClientSize = new Size(1024, 768)
            };

            var device = new D3D9Device(
                new Direct3D(),
                CUDADevice.Default.ID,
                DeviceType.Hardware,
                form.Handle,
                CreateFlags.HardwareVertexProcessing,
                new PresentParameters(form.ClientSize.Width, form.ClientSize.Height));

            var vertices = new VertexBuffer(device, Utilities.SizeOf <Vector4>() * Total, Usage.WriteOnly,
                                            VertexFormat.None, Pool.Default);

            var vertexElems = new []
            {
                new VertexElement(0, 0, DeclarationType.Float3, DeclarationMethod.Default, DeclarationUsage.Position, 0),
                new VertexElement(0, 12, DeclarationType.Ubyte4, DeclarationMethod.Default, DeclarationUsage.Color, 0),
                VertexElement.VertexDeclarationEnd
            };

            var vertexDecl = new VertexDeclaration(device, vertexElems);

            var worker  = Worker.CreateByFunc(() => Generate(device));
            var updater = new SimpleD3D9(GPUModuleTarget.Worker(worker));

            var view = Matrix.LookAtLH(
                new Vector3(0.0f, 3.0f, -2.0f), // the camera position
                new Vector3(0.0f, 0.0f, 0.0f),  // the look-at position
                new Vector3(0.0f, 1.0f, 0.0f)); // the up direction

            var proj = Matrix.PerspectiveFovLH(
                (float)(Math.PI / 4.0), // the horizontal field of view
                1.0f,
                1.0f,
                100.0f);

            device.SetTransform(TransformState.View, view);
            device.SetTransform(TransformState.Projection, proj);
            device.SetRenderState(RenderState.Lighting, false);

            var vbres = RegisterVerticesResource(vertices);
            var clock = System.Diagnostics.Stopwatch.StartNew();

            RenderLoop.Run(form, () =>
            {
                var time = (float)(clock.Elapsed.TotalMilliseconds) / 300.0f;
                updater.Update(vbres, time);

                // Now normal D3D9 rendering procedure.
                device.Clear(ClearFlags.Target | ClearFlags.ZBuffer, new ColorBGRA(0, 40, 100, 0), 1.0f, 0);
                device.BeginScene();

                device.VertexDeclaration = vertexDecl;
                device.SetStreamSource(0, vertices, 0, Utilities.SizeOf <Vector4>());
                // we use PointList as the graphics primitives
                device.DrawPrimitives(SharpDX.Direct3D9.PrimitiveType.PointList, 0, Total);

                device.EndScene();
                device.Present();
            });

            UnregisterVerticesResource(vbres);

            updater.Dispose();
            worker.Dispose();
            vertexDecl.Dispose();
            vertices.Dispose();
            device.Dispose();
            form.Dispose();
        }
예제 #34
0
 public TriDiagSolverModule(GPUModuleTarget target)
     : base(target)
 {
 }