コード例 #1
0
        private void KernelRun(CudaStream stream, CUdeviceptr outputPtr, CUdeviceptr input1Ptr, CUdeviceptr input2Ptr, int size, bool async = false)
        {
            CUdeviceptr bufferPtr = m_buffer.GetDevicePtr(m_nGPU);

            outputPtr = outputPtr + outOffset * m_outTypeSize;

            if (this.size > 0)
            {
                size = this.size;
            }

            int segmentedFlag   = Convert.ToInt32(segmented);
            int distributedFlag = Convert.ToInt32(distributed);

            if (async)
            {
                m_kernels[threadCount].RunAsync(stream, outputPtr, outOffset, input1Ptr, input2Ptr, bufferPtr, size, segmentedFlag, distributedFlag);
            }
            else
            {
                m_kernels[threadCount].Run(outputPtr, outOffset, input1Ptr, input2Ptr, bufferPtr, size, segmentedFlag, distributedFlag);
            }

            ResetParameters();
        }
コード例 #2
0
            /// <summary>
            /// Draws a single element of a guess at the provided position
            /// </summary>
            /// <param name="s">[async] stream that the kernels will execute in</param>
            /// <param name="guess">element of a guess to draw</param>
            /// <param name="position">the positiion (0,1,2,...) of the element inside the guess vector</param>
            /// <param name="xGroupOffset">X pixel offset of the first element of the guess vector</param>
            /// <param name="yGroupOffset">Y pixel offset of the first element of the guess vector</param>
            private void DrawGuessAtPosition(CudaStream s, float guess, int position, int xGroupOffset, int yGroupOffset)
            {
                MyCudaTexture textureCircleRim  = Owner.m_textureCircleRim;
                MyCudaTexture textureCircleMask = Owner.m_textureCircleMask;

                int   xOffset    = xGroupOffset + position * (GFX_GUESS_SPACER + textureCircleRim.SizeInPixels.x);
                int   yOffset    = yGroupOffset;
                Color guessColor = Owner.GetGuessColor((int)Math.Round(guess));

                // color
                int yDiv = 10;

                Debug.Assert(textureCircleMask.SizeInPixels.y % yDiv == 0);
                m_MaskedColorKernel.SetupExecution(new dim3(textureCircleMask.SizeInPixels.x, yDiv, 1),
                                                   new dim3(textureCircleMask.SizeInPixels.y / yDiv, TARGET_VALUES_PER_PIXEL));
                m_MaskedColorKernel.RunAsync(s, Owner.VisualOutput, Owner.VisualWidth, Owner.VisualHeight, xOffset, yOffset,
                                             textureCircleMask.BitmapPtr, textureCircleMask.SizeInPixels.x, textureCircleMask.SizeInPixels.y,
                                             guessColor.R / 255.0f, guessColor.G / 255.0f, guessColor.B / 255.0f);

                // circle rim
                Debug.Assert(textureCircleRim.SizeInPixels.y % yDiv == 0);
                m_RgbaTextureKernel.SetupExecution(new dim3(textureCircleRim.SizeInPixels.x, yDiv, 1),
                                                   new dim3(textureCircleRim.SizeInPixels.y / yDiv, TARGET_VALUES_PER_PIXEL));
                m_RgbaTextureKernel.RunAsync(s, Owner.VisualOutput, Owner.VisualWidth, Owner.VisualHeight, xOffset, yOffset,
                                             textureCircleRim.BitmapPtr, textureCircleRim.SizeInPixels.x, textureCircleRim.SizeInPixels.y);
            }
コード例 #3
0
        public CudaError LaunchKernelWithStreamBinding(
            CudaStream stream,
            CudaKernel kernel,
            int gridDimX,
            int gridDimY,
            int gridDimZ,
            int blockDimX,
            int blockDimY,
            int blockDimZ,
            int sharedMemSizeInBytes,
            IntPtr args,
            IntPtr kernelArgs)
        {
            var binding = stream.BindScoped();

            var result = LaunchKernel(
                kernel.FunctionPtr,
                gridDimX,
                gridDimY,
                gridDimZ,
                blockDimX,
                blockDimY,
                blockDimZ,
                sharedMemSizeInBytes,
                stream.StreamPtr,
                args,
                kernelArgs);

            binding.Recover();
            return(result);
        }
コード例 #4
0
            public override void Init(int nGPU)
            {
                switch (Mode)
                {
                case HashMapperMode.Simple:
                    break;

                case HashMapperMode.LocalitySensitive:
                    MyMemoryManager.Instance.ClearGlobalVariable(Owner.GlobalVariableName, Owner.GPU);

                    // Only values are the modulo and and integer divisor (placing into bins)
                    Owner.Temp.SafeCopyToHost(0, 2);
                    Owner.Temp.Host[0] = 2f;
                    Owner.Temp.Host[1] = 2f / InternalBinCount;
                    Owner.Temp.SafeCopyToDevice(0, 2);

                    break;

                default:
                    throw new ArgumentOutOfRangeException();
                }
                _combineVectorsKernel = MyKernelFactory.Instance.Kernel(nGPU, @"common\CombineVectorsKernel", "CombineTwoVectorsKernelVarSize");
                _combineVectorsKernel.SetupExecution(Owner.InputSize);

                _hashKernel = MyKernelFactory.Instance.Kernel(nGPU, @"VSA\Mappers", "GetIndices_ImplicitSeed");
                _hashKernel.SetupExecution(Owner.Output.Count);

                _noHashKernel = MyKernelFactory.Instance.Kernel(nGPU, @"VSA\Mappers", "GetIndices_NoHashing");
                _noHashKernel.SetupExecution(Owner.Output.Count);

                m_stream = new CudaStream();
            }
コード例 #5
0
 private CudaStreamProvider()
 {
     for (var i = 0; i < StreamsPerThread; i++)
     {
         m_streamPool[i] = new CudaStream();
     }
 }
コード例 #6
0
ファイル: TestCuda.cs プロジェクト: wo80/csparse-interop
        private static void RunCudaTest(SparseMatrix A, SparseMatrix B)
        {
            // NOTE: for Hermitian matrices, the storage has to be transposed, which is
            //       done by the solver (unless 3rd argument transpose = false).

            using (var stream = new CudaStream())
                using (var solver = new CudaCholesky(stream, B))
                {
                    TestRandomSymmetric(solver, B, "CUDA Cholesky");

                    ReportGpuTime(solver.FactorizationTime);
                }

            using (var stream = new CudaStream())
                using (var solver = new CudaQR(stream, A))
                {
                    TestRandom(solver, A, "CUDA QR");

                    ReportGpuTime(solver.FactorizationTime);
                }

            using (var stream = new CudaStream())
                using (var solver = new CudaQR(stream, B))
                {
                    TestRandomSymmetric(solver, B, "CUDA QR");

                    ReportGpuTime(solver.FactorizationTime);
                }
        }
コード例 #7
0
            /// <summary>
            /// Transforms the <paramref name="output"/> vector into a vector of indices with properties specified by the parameters.
            /// </summary>
            /// <param name="input">The vector to transform.</param>
            /// <param name="output">The memory to contain the results.</param>
            /// <param name="misc">A vector containing the range to modulate to as the first value (typically 2f because dot product ranges from [-1,1])
            /// and the bin size in this modulated space (typically <paramref name="misc"/>[0] / internalBinCount) as the second value.</param>
            /// <param name="offsets">The random offsets for each <paramref name="output"/> value (typically uniform random numbers in [0, <paramref name="misc"/>[0].</param>
            /// <param name="vectorSize">The length of the <paramref name="output"/> vector.</param>
            /// <param name="outputBinCount">The range into which the internal bins will be scattered.</param>
            /// <param name="seed">The seed used for the scattering the internal bins.</param>
            /// <param name="combineVectorsKernel">The kernel used for addition, modulo and integer division.</param>
            /// <param name="hashKernel">The kernel used for scattering the internal bins.</param>
            /// <param name="noHashKernel"></param>
            /// <param name="doHashMapping"></param>
            /// <param name="internalBinCount"></param>
            /// <param name="stream"></param>
            public static void GetIndices(
                CUdeviceptr input, CUdeviceptr output, CUdeviceptr misc, CUdeviceptr?offsets,
                int vectorSize, int outputBinCount, int seed,
                MyCudaKernel combineVectorsKernel, MyCudaKernel hashKernel, MyCudaKernel noHashKernel,
                bool doHashMapping, int internalBinCount,
                CudaStream stream)
            {
                Debug.Assert(vectorSize > 0, "Invalid vector size");
                Debug.Assert(outputBinCount > 1, "Requires at least 2 output bins");
                Debug.Assert(combineVectorsKernel != null && hashKernel != null, "Missing kernels");


                // Values are in [-1, 1] if they were normalized

                if (offsets != null)
                {
                    // Offset to [-1, 3]
                    combineVectorsKernel.RunAsync(stream, input, offsets.Value, output, (int)MyJoin.MyJoinOperation.Addition, vectorSize, vectorSize);
                }

                // Modulate to [0, 2]
                combineVectorsKernel.RunAsync(stream, output, misc, output, (int)MyJoin.MyJoinOperation.Modulo, vectorSize, 1);

                // Transform to integers in [0, InternalBinCount - 1]
                combineVectorsKernel.RunAsync(stream, output, misc + sizeof(float), output, (int)MyJoin.MyJoinOperation.Division_int, vectorSize, 1);

                if (doHashMapping)
                {
                    hashKernel.RunAsync(stream, output, output, vectorSize, vectorSize, outputBinCount, seed);
                }
                else
                {
                    noHashKernel.RunAsync(stream, output, output, vectorSize, internalBinCount);
                }
            }
コード例 #8
0
        // TODO(Premek): Add more copy functions to be consistent with Async variants. (And cleanup the API in general.)

        #region Async copy device to device

        public virtual void CopyFromMemoryBlockAsync(MyMemoryBlock <T> source, int srcOffset, int destOffset, int count,
                                                     CudaStream stream = null)
        {
            Device[Owner.GPU].AsyncCopyToDevice(
                source.GetDevice(Owner.GPU), srcOffset * ESize, destOffset * ESize, count * ESize,
                MyKernelFactory.GetCuStreamOrDefault(stream));
        }
コード例 #9
0
        /// <summary>
        /// Retrieves the compressed stream from the encoder state that was previously used in one of the encoder functions.<para/>
        /// Note: Synchronizes on stream. For async use IntPtr!
        /// </summary>
        /// <param name="stream">CUDA stream where all the required device operations will be placed.</param>
        /// <returns>Byte array containing the data.</returns>
        public byte[] RetrieveBitstream(CudaStream stream)
        {
            SizeT size = new SizeT();

            res = NvJpegNativeMethods.nvjpegEncodeRetrieveBitstream(_nvJpeg.Handle, _state, IntPtr.Zero, ref size, stream.Stream);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nvjpegEncodeRetrieveBitstream", res));
            if (res != nvjpegStatus.Success)
            {
                throw new NvJpegException(res);
            }

            byte[]   data   = new byte[size];
            GCHandle handle = GCHandle.Alloc(data, GCHandleType.Pinned);

            try
            {
                IntPtr ptr = handle.AddrOfPinnedObject();
                res = NvJpegNativeMethods.nvjpegEncodeRetrieveBitstream(_nvJpeg.Handle, _state, ptr, ref size, stream.Stream);
                Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nvjpegEncodeRetrieveBitstream", res));
                stream.Synchronize();
            }
            finally
            {
                handle.Free();
            }
            if (res != nvjpegStatus.Success)
            {
                throw new NvJpegException(res);
            }

            return(data);
        }
コード例 #10
0
            /// <summary>
            /// Works just like DrawGuessAtPosition, only for evaluations.
            /// </summary>
            private void DrawEvaluationAtPosition(CudaStream s, MyMastermindWorld.EvaluationKind evaluation, int position,
                                                  int xGroupOffset, int yGroupOffset, int evaluationsPerRow)
            {
                MyCudaTexture texture;

                switch (evaluation)
                {
                case MyMastermindWorld.EvaluationKind.Bull:
                    texture = Owner.m_textureBull;
                    break;

                case MyMastermindWorld.EvaluationKind.Cow:
                    texture = Owner.m_textureCow;
                    break;

                case MyMastermindWorld.EvaluationKind.Miss:
                default:
                    texture = Owner.m_textureMiss;
                    break;
                }

                int xOffset = xGroupOffset + (position % evaluationsPerRow) * (GFX_EVALUATION_SPACER + texture.SizeInPixels.x);
                int yOffset = yGroupOffset + (position / evaluationsPerRow) * (GFX_EVALUATION_SPACER + texture.SizeInPixels.y);

                int yDiv = 10;

                Debug.Assert(texture.SizeInPixels.y % yDiv == 0);
                m_RgbaTextureKernel.SetupExecution(new dim3(texture.SizeInPixels.x, yDiv, 1),
                                                   new dim3(texture.SizeInPixels.y / yDiv, TARGET_VALUES_PER_PIXEL));
                m_RgbaTextureKernel.RunAsync(s, Owner.VisualOutput, Owner.VisualWidth, Owner.VisualHeight, xOffset, yOffset,
                                             texture.BitmapPtr, texture.SizeInPixels.x, texture.SizeInPixels.y);
            }
コード例 #11
0
ファイル: CudaAPI.cs プロジェクト: saimarpaka/ILGPU
        internal CudaError LaunchKernelWithStreamBinding(
            CudaStream stream,
            CudaKernel kernel,
            RuntimeKernelConfig config,
            IntPtr args,
            IntPtr kernelArgs)
        {
            var binding = stream.BindScoped();

            var result = LaunchKernel(
                kernel.FunctionPtr,
                config.GridDim.X,
                config.GridDim.Y,
                config.GridDim.Z,
                config.GroupDim.X,
                config.GroupDim.Y,
                config.GroupDim.Z,
                config.SharedMemoryConfig.DynamicArraySize,
                stream.StreamPtr,
                args,
                kernelArgs);

            binding.Recover();
            return(result);
        }
コード例 #12
0
        /// <summary> Runs the kernel in asynchronous mode. </summary>
        /// <param name="stream">If the stream is null, the default per-thread stream is used.</param>
        /// <param name="args">MyMemoryBlock arguments are automatically converted to device pointers.</param>
        public void RunAsync(CudaStream stream, params object[] args)
        {
            CheckExecutionSetup();
            ConvertMemoryBlocksToDevicePtrs(args);

            m_kernel.RunAsync(MyKernelFactory.GetCuStreamOrDefault(stream), args);
        }
コード例 #13
0
 public virtual void CopyToMemoryBlockAsync(MyMemoryBlock <T> destination, int srcOffset, int destOffset, int count,
                                            CudaStream stream = null)
 {
     destination.GetDevice(Owner.GPU).AsyncCopyToDevice(
         Device[Owner.GPU], srcOffset * ESize, destOffset * ESize, count * ESize,
         MyKernelFactory.GetCuStreamOrDefault(stream));
 }
コード例 #14
0
        private static void RunCudaTest(SparseMatrix A, SparseMatrix B)
        {
            // NOTE: for real symmetric matrices, there's no need to transpose the storage, so
            //       we call the solver constructor with 3rd argument transpose = false.

            using (var stream = new CudaStream())
                using (var solver = new CudaCholesky(stream, B, false))
                {
                    TestRandomSymmetric(solver, B, "CUDA Cholesky");

                    ReportGpuTime(solver.FactorizationTime);
                }

            using (var stream = new CudaStream())
                using (var solver = new CudaQR(stream, A))
                {
                    TestRandom(solver, A, "CUDA QR");

                    ReportGpuTime(solver.FactorizationTime);
                }

            using (var stream = new CudaStream())
                using (var solver = new CudaQR(stream, B, false))
                {
                    TestRandomSymmetric(solver, B, "CUDA QR");

                    ReportGpuTime(solver.FactorizationTime);
                }
        }
コード例 #15
0
        public MyFourierBinder(MyWorkingNode owner, int inputSize, MyMemoryBlock <float> tempBlock)
            : base(owner, inputSize, tempBlock)
        {
            m_stream = new CudaStream();

            m_fft = new CudaFFTPlan1D(inputSize, cufftType.R2C, 1);
            m_fft.SetStream(m_stream.Stream);
            m_ifft = new CudaFFTPlan1D(inputSize, cufftType.C2R, 1);
            m_ifft.SetStream(m_stream.Stream);

            m_mulkernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "MulComplexElementWise");
            m_mulkernel.SetupExecution(inputSize + 1);

            m_involutionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "InvolveVector");
            m_involutionKernel.SetupExecution(inputSize - 1);

            m_inversionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\InvertValuesKernel", "InvertLengthComplexKernel");
            m_inversionKernel.SetupExecution(inputSize);

            m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(owner, owner.GPU, ProductMode.f_DotProduct_f);

            m_normalKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel");
            m_normalKernel.SetupExecution(inputSize);

            m_firstFFTOffset  = 0;
            m_secondFFTOffset = (inputSize + 1) * 2;
            m_tempOffset      = (inputSize + 1) * 4;

            Denominator = inputSize;
        }
コード例 #16
0
        public MyPermutationBinder(MyWorkingNode owner, int inputSize, MyMemoryBlock <float> tempBlock)
            : base(owner, inputSize, tempBlock)
        {
            m_stream = new CudaStream();

            m_binaryPermKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel");
            m_binaryPermKernel.SetupExecution(inputSize);
        }
コード例 #17
0
ファイル: GpuContext.cs プロジェクト: r-alex98/ConvNetLib
 private GpuContext()
 {
     CudaContext   = new CudaContext(Global.CudaDeviceId);
     BlasContext   = new CudaBlas();
     KernelManager = new KernelManager(this);
     Methods       = new TensorMethods(this);
     Stream        = new CudaStream();
 }
コード例 #18
0
 /// <summary>
 /// Register a cuda context for reference via <see cref="GetContextForDeviceId"/> (used for context restoration).
 /// </summary>
 /// <param name="context"></param>
 internal static void RegisterContext(CudaContext context, CudaStream stream)
 {
     if (!RegisteredContexts.Contains(context))
     {
         RegisteredContexts.Add(context);
         RegisteredStreamsByContext.Add(context, stream);
     }
 }
コード例 #19
0
 // finishing async operations on the device
 public void DecodeJpegDevice(ref nvjpegImage destination, CudaStream cudaStream)
 {
     res = NvJpegNativeMethods.nvjpegDecodeJpegDevice(_nvJpeg.Handle, _decoder.Decoder, _state, ref destination, cudaStream.Stream);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nvjpegDecodeJpegDevice", res));
     if (res != nvjpegStatus.Success)
     {
         throw new NvJpegException(res);
     }
 }
コード例 #20
0
ファイル: TensorOpGpu.cs プロジェクト: uzbekdev1/Neuro
        static TensorOpGpu()
        {
            _CudaContext    = new CudaContext(0, true);
            _CudaBlasHandle = new CudaBlas();
            _CudaStream     = new CudaStream();
            _CudnnContext   = new CudaDNNContext();

            _KernelLoader = new KernelLoader();
        }
コード例 #21
0
 // copies huffman tables from parsed stream. should require same scans structure
 public void CopyHuffmanTables(EncoderParams encoderParams, JpegStream jpeg, CudaStream stream)
 {
     res = NvJpegNativeMethods.nvjpegEncoderParamsCopyHuffmanTables(_state, encoderParams.Params, jpeg.Stream, stream.Stream);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nvjpegEncoderParamsCopyHuffmanTables", res));
     if (res != nvjpegStatus.Success)
     {
         throw new NvJpegException(res);
     }
 }
コード例 #22
0
 public void SetOptimizedHuffman(int optimized, CudaStream stream)
 {
     res = NvJpegNativeMethods.nvjpegEncoderParamsSetOptimizedHuffman(_params, optimized, stream.Stream);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nvjpegEncoderParamsSetOptimizedHuffman", res));
     if (res != nvjpegStatus.Success)
     {
         throw new NvJpegException(res);
     }
 }
コード例 #23
0
 public void SetSamplingFactors(nvjpegChromaSubsampling chroma_subsampling, CudaStream stream)
 {
     res = NvJpegNativeMethods.nvjpegEncoderParamsSetSamplingFactors(_params, chroma_subsampling, stream.Stream);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nvjpegEncoderParamsSetSamplingFactors", res));
     if (res != nvjpegStatus.Success)
     {
         throw new NvJpegException(res);
     }
 }
コード例 #24
0
 /// <summary>
 /// This function sets the stream to be used by the cudnn library to execute its routines.
 /// </summary>
 /// <param name="stream">the stream to be used by the library.</param>
 public void SetStream(CudaStream stream)
 {
     res = CudaDNNNativeMethods.cudnnSetStream(_handle, stream.Stream);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnSetStream", res));
     if (res != cudnnStatus.Success)
     {
         throw new CudaDNNException(res);
     }
 }
コード例 #25
0
 public void SetEncoding(nvjpegJpegEncoding etype, CudaStream stream)
 {
     res = NvJpegNativeMethods.nvjpegEncoderParamsSetEncoding(_params, etype, stream.Stream);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nvjpegEncoderParamsSetEncoding", res));
     if (res != nvjpegStatus.Success)
     {
         throw new NvJpegException(res);
     }
 }
コード例 #26
0
 /// <summary>
 /// </summary>
 internal EncoderState(NvJpeg nvJpeg, CudaStream stream)
 {
     _nvJpeg = nvJpeg;
     _state  = new nvjpegEncoderState();
     res     = NvJpegNativeMethods.nvjpegEncoderStateCreate(nvJpeg.Handle, ref _state, stream.Stream);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nvjpegEncoderStateCreate", res));
     if (res != nvjpegStatus.Success)
     {
         throw new NvJpegException(res);
     }
 }
コード例 #27
0
        /// <summary>
        /// Retrieves the compressed stream from the encoder state that was previously used in one of the encoder functions.
        /// </summary>
        /// <param name="ptr">Pointer to the buffer in the host memory where the compressed stream will be stored. Can be NULL</param>
        /// <param name="length">input buffer size.</param>
        /// <param name="stream">CUDA stream where all the required device operations will be placed.</param>
        /// <returns>On return the NVJPEG library will give the actual compressed stream size in this value.</returns>
        public SizeT RetrieveBitstream(IntPtr ptr, SizeT length, CudaStream stream)
        {
            res = NvJpegNativeMethods.nvjpegEncodeRetrieveBitstream(_nvJpeg.Handle, _state, ptr, ref length, stream.Stream);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nvjpegEncodeRetrieveBitstream", res));
            if (res != nvjpegStatus.Success)
            {
                throw new NvJpegException(res);
            }

            return(length);
        }
コード例 #28
0
        public void InitializeData(int count)
        {
            this.count = count;
            IntPtr hostPointer = IntPtr.Zero;
            var    res         = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref hostPointer, count * sizeof(ResultPoint));

            if (res != CUResult.Success)
            {
                throw new CudaException(res);
            }
            hostBuffer   = (ResultPoint *)hostPointer;
            deviceBuffer = new CudaDeviceVariable <ResultPoint>(count);
            for (int i = 0; i < count; i++)
            {
                hostBuffer[i].X = (uint)i;
                hostBuffer[i].Y = (uint)i;
            }
            defaultStream = new CudaStream();
            res           = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(
                deviceBuffer.DevicePointer,
                hostPointer,
                deviceBuffer.SizeInBytes,
                defaultStream.Stream);
            if (res != CUResult.Success)
            {
                throw new CudaException(res);
            }

            IntPtr secondHostPointer = IntPtr.Zero;

            res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref secondHostPointer, count * sizeof(ResultPoint));
            if (res != CUResult.Success)
            {
                throw new CudaException(res);
            }
            secondHostBuffer   = (ResultPoint *)secondHostPointer;
            secondDeviceBuffer = new CudaDeviceVariable <ResultPoint>(count);
            for (int i = 0; i < count; i++)
            {
                secondHostBuffer[i].X = (uint)i;
                secondHostBuffer[i].Y = (uint)i;
            }
            defaultStream = new CudaStream();
            res           = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(
                secondDeviceBuffer.DevicePointer,
                secondHostPointer,
                secondDeviceBuffer.SizeInBytes,
                defaultStream.Stream);
            if (res != CUResult.Success)
            {
                throw new CudaException(res);
            }
        }
コード例 #29
0
        private CudnnContext(CudnnHandle handle, CudaStream stream)
        {
            if (handle.Pointer == IntPtr.Zero)
            {
                throw new ArgumentException("handle");
            }

            Contract.EndContractBlock();

            this.handle = handle;
            this.stream = stream;
        }
コード例 #30
0
        public static CudnnContext Create(CudaStream stream = null)
        {
            CudnnHandle handle = default(CudnnHandle);

            Invoke(() => CudnnNativeMethods.cudnnCreate(out handle));
            Contract.Assume(handle.Pointer != IntPtr.Zero);

            if (stream != null)
            {
                Invoke(() => CudnnNativeMethods.cudnnSetStream(handle, stream.Stream));
            }
            return(new CudnnContext(handle, stream));
        }
コード例 #31
0
		/// <summary>
		/// Create new dense solve instance using stream stream
		/// </summary>
		/// <param name="stream"></param>
        public CudaSolveDense(CudaStream stream)
			: this()
        {
			SetStream(stream);
        }
コード例 #32
0
		/// <summary>
		/// Create new sparse solve instance using stream stream
		/// </summary>
		/// <param name="stream"></param>
		public CudaSolveSparse(CudaStream stream)
			: this()
		{
			SetStream(stream);
		}
コード例 #33
0
 /// <summary>
 /// This function sets the stream to be used by the cudnn library to execute its routines.
 /// </summary>
 /// <param name="stream">the stream to be used by the library.</param>
 public void SetStream(CudaStream stream)
 {
     res = CudaDNNNativeMethods.cudnnSetStream(_handle, stream.Stream);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnSetStream", res));
     if (res != cudnnStatus.Success) throw new CudaDNNException(res);
 }