示例#1
0
        //[CudnnMnistFCF]
        public void FullyConnectedForward(Layer ip, nchw_t nchw, DeviceMemory <float> srcData, ref DeviceMemory <float> dstData)
        {
            if (nchw.N != 1)
            {
                throw new Exception("Not Implemented");
            }
            var dimX = nchw.C * nchw.H * nchw.W;
            var dimY = ip.Outputs;

            Resize(ref dstData, dimY);

            const float alpha = 1.0f;
            const float beta  = 1.0f;

            // This cuMemcpyDtoD is a raw CUDA API call so it should be guarded with worker.Eval
            var output = dstData;

            _worker.EvalAction(() => CUDAInterop.cuMemcpyDtoD(output.Ptr.Handle, ip.BiasD.Handle, (IntPtr)(dimY * sizeof(float))));

            // This cublas call doesn't need worker.Eval because cublas is a thin wrapper for the raw API
            // and it alreadyhas worke.eval
            _cublas.Sgemv(CUBLASInterop.cublasOperation_t.CUBLAS_OP_T, dimX, dimY, alpha, ip.DataD.Ptr, dimX,
                          srcData.Ptr, 1, beta, dstData.Ptr, 1);

            nchw.H = 1;
            nchw.W = 1;
            nchw.C = dimY;
        }
示例#2
0
        unsafe static IntPtr RegisterVerticesResource(VertexBuffer vertices)
        {
            var res = IntPtr.Zero;

            CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsD3D9RegisterResource(&res, vertices.NativePointer, 0));
            return(res);
        }
示例#3
0
        //[/GLdescription]

        //[LockPositions]
        void LockPos(Del f)
        {
            CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsResourceSetMapFlags(_resources[0],
                                                                             (uint)CUDAInterop.CUgraphicsMapResourceFlags_enum.CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY));
            CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsResourceSetMapFlags(_resources[1],
                                                                             (uint)CUDAInterop.CUgraphicsMapResourceFlags_enum.CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD));
            CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsMapResourcesEx(2u, _resources, IntPtr.Zero));

            var bytes   = IntPtr.Zero;
            var handle0 = IntPtr.Zero;
            var handle1 = IntPtr.Zero;

            unsafe
            {
                CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsResourceGetMappedPointer(&handle0, &bytes,
                                                                                      _resources[0]));
                CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsResourceGetMappedPointer(&handle1, &bytes,
                                                                                      _resources[1]));
            }
            var pos0 = new deviceptr <float4>(handle0);
            var pos1 = new deviceptr <float4>(handle1);

            try
            {
                f(pos0, pos1);
            }
            finally
            {
                CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsUnmapResourcesEx(2u, _resources, IntPtr.Zero));
            }
        }
示例#4
0
        unsafe static Tuple <CUDADevice, IntPtr> Generate(D3D9Device d3d9Device)
        {
            var cuContext = IntPtr.Zero;
            var cuDevice  = -1;

            CUDAInterop.cuSafeCall(CUDAInterop.cuD3D9CtxCreate(&cuContext, &cuDevice, 0u, d3d9Device.NativePointer));
            return(new Tuple <CUDADevice, IntPtr>(CUDADevice.DeviceDict[cuDevice], cuContext));
        }
示例#5
0
        //[/startSimWindow]

        //[createGLContextGenerator]
        unsafe Tuple <Device, IntPtr> Generate()
        {
            var cuContext = IntPtr.Zero;
            var cuDevice  = Device.Default;

            CUDAInterop.cuSafeCall(CUDAInterop.cuGLCtxCreate(&cuContext, 0u, Device.Default.ID));
            return(new Tuple <Device, IntPtr>(cuDevice, cuContext));
        }
示例#6
0
        public GpuSpaceBufferContext(uint buffer, DiscreteBounds bounds)
        {
            CUDAInterop.cuGLRegisterBufferObject(buffer);

            _buffer        = buffer;
            _devicePointer = new deviceptr <VoxelFace>(GetDevicePointer());
            _bounds        = bounds;
        }
示例#7
0
        private IntPtr GetDevicePointer()
        {
            IntPtr pointer, size;

            unsafe
            {
                CUDAInterop.cuSafeCall(CUDAInterop.cuGLMapBufferObject(&pointer, &size, _buffer));
            }
            return(pointer);
        }
示例#8
0
        unsafe public void Update(IntPtr vbRes, float time)
        {
            // 1. map resource to cuda space, means lock to cuda space
            var vbRes1 = vbRes;

            CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsMapResources(1, &vbRes1, IntPtr.Zero));

            // 2. get memory pointer from mapped resource
            var vbPtr  = IntPtr.Zero;
            var vbSize = IntPtr.Zero;

            CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsResourceGetMappedPointer(&vbPtr, &vbSize, vbRes1));

            // 3. create device pointer, and run the kernel
            var pos = new deviceptr <float4>(vbPtr);

            GPULaunch(Kernel, LaunchParam, pos, time);

            // 4. unmap resource, means unlock, so that DirectX can then use it again
            CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsUnmapResources(1u, &vbRes1, IntPtr.Zero));
        }
示例#9
0
 //[overrideFuctions]
 protected override void Dispose(bool disposing)
 {
     foreach (var resource in _resources)
     {
         CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsUnregisterResource(resource));
     }
     foreach (var buffer in _buffers)
     {
         CUDAInterop.cuSafeCall(CUDAInterop.cuGLUnregisterBufferObject(buffer));
     }
     if (_buffers.Length > 0)
     {
         GL.DeleteBuffers(_buffers.Length, _buffers);
     }
     if (disposing)
     {
         _vel.Dispose();
         _disposeSimulators();
         _worker.Dispose();
     }
     base.Dispose(disposing);
 }
示例#10
0
 static void UnregisterVerticesResource(IntPtr res)
 {
     CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsUnregisterResource(res));
 }
示例#11
0
        //[/LockPositions]

        public SimWindow() : base(800, 600, GraphicsMode.Default, "Gravitational n-body simulation")
        {
            _numBodies = 256 * 64;
            const float clusterScale  = 1.0f;
            const float velocityScale = 1.0f;

            _deltaTime        = 0.001f;
            _softeningSquared = 0.00125f;
            _damping          = 0.9995f;
            //[CreateWorker]
            _worker = Worker.CreateByFunc(Generate);
            //[/CreateWorker]

            _stopwatch    = Stopwatch.StartNew();
            _fpsCalcLag   = 128;
            _frameCounter = 0;

            //[CreateSimulatros]
            _simulators = new Queue <ISimulator>();
            var target = GPUModuleTarget.Worker(_worker);

            var simulatorGpuDynamicBlockSizeModule = new GpuDynamicSimulatorModule(target);         // need dispose
            var simulatorGpuDynamicBlockSize64     = simulatorGpuDynamicBlockSizeModule.Create(64);
            var simulatorGpuDynamicBlockSize128    = simulatorGpuDynamicBlockSizeModule.Create(128);
            var simulatorGpuDynamicBlockSize256    = simulatorGpuDynamicBlockSizeModule.Create(256);
            var simulatorGpuDynamicBlockSize512    = simulatorGpuDynamicBlockSizeModule.Create(512);

            var simulatorGpuStaticBlockSizeModule64  = new GpuStaticSimulatorModule64(target);      // need dispose
            var simulatorGpuStaticBlockSizeModule128 = new GpuStaticSimulatorModule128(target);     // need dispose
            var simulatorGpuStaticBlockSizeModule256 = new GpuStaticSimulatorModule256(target);     // need dispose
            var simulatorGpuStaticBlockSizeModule512 = new GpuStaticSimulatorModule512(target);     // need dispose

            // First, enquene one simulator which is 256 blocksize so we can compare with C code for performance.
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule256);

            // Enqueue several dynamic block size simulators.
            _simulators.Enqueue(simulatorGpuDynamicBlockSize64);
            _simulators.Enqueue(simulatorGpuDynamicBlockSize128);
            _simulators.Enqueue(simulatorGpuDynamicBlockSize256);
            _simulators.Enqueue(simulatorGpuDynamicBlockSize512);

            // Enqueue several static block size simulators.
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule64);
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule128);
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule256);
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule512);

            // We do not enqueue any cpu simulator as it is much too slow.
            //_simulators.Enqueue(new CpuSimulator(_worker, _numBodies));

            _disposeSimulators = () =>
            {
                simulatorGpuDynamicBlockSizeModule.Dispose();
                simulatorGpuStaticBlockSizeModule64.Dispose();
                simulatorGpuStaticBlockSizeModule128.Dispose();
                simulatorGpuStaticBlockSizeModule256.Dispose();
                simulatorGpuStaticBlockSizeModule512.Dispose();
            };

            _simulator = _simulators.Dequeue();
            //[/CreateSimulatros]

            //[CreateBuffers]
            _buffers = new uint[2];
            for (var i = 0; i < _buffers.Length; i++)
            {
                _buffers[i] = 0;
            }
            GL.GenBuffers(_buffers.Length, _buffers);
            foreach (var buffer in _buffers)
            {
                GL.BindBuffer(BufferTarget.ArrayBuffer, buffer);
                GL.BufferData(BufferTarget.ArrayBuffer,
                              (IntPtr)(Microsoft.FSharp.Core.Operators.SizeOf <float4>() * _numBodies),
                              IntPtr.Zero, BufferUsageHint.DynamicDraw);
                var size = 0;
                unsafe
                {
                    GL.GetBufferParameter(BufferTarget.ArrayBuffer, BufferParameterName.BufferSize, &size);
                }
                if (size != Microsoft.FSharp.Core.Operators.SizeOf <float4>() * _numBodies)
                {
                    throw new Exception("Pixel Buffer Object allocation failed!");
                }
                GL.BindBuffer(BufferTarget.ArrayBuffer, 0);
                CUDAInterop.cuSafeCall(CUDAInterop.cuGLRegisterBufferObject(buffer));
            }

            _resources = new IntPtr[_buffers.Length];
            for (var i = 0; i < _buffers.Length; i++)
            {
                var res = IntPtr.Zero;
                unsafe
                {
                    CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsGLRegisterBuffer(&res, _buffers[i], 0u));
                }
                _resources[i] = res;
            }
            //[/CreateBuffers]

            //[FinalizeGL]
            _vel = _worker.Malloc <float4>(_numBodies);

            float4[] hpos, hvel;
            BodyInitializer.Initialize(new BodyInitializer3(), clusterScale, velocityScale, _numBodies,
                                       out hpos, out hvel);
            _worker.Scatter(hvel, _vel.Ptr, Microsoft.FSharp.Core.FSharpOption <int> .None,
                            Microsoft.FSharp.Core.FSharpOption <int> .None);
            LockPos(
                (pos0, pos1) =>
                _worker.Scatter(hpos, pos1, Microsoft.FSharp.Core.FSharpOption <int> .None,
                                Microsoft.FSharp.Core.FSharpOption <int> .None));

            Help();
            Description();
            //[/FinalizeGL]
        }
示例#12
0
 public void Dispose()
 {
     CUDAInterop.cuGLUnmapBufferObject(_buffer);
     CUDAInterop.cuGLUnregisterBufferObject(_buffer);
 }