public static void cuEventRecord(CUevent hEvent, CUstream hStream) { Wrap(() => { try { var error = nativeEventRecord(hEvent, hStream); if (error != CUresult.CUDA_SUCCESS) { throw new CudaException(error); } } catch (CudaException) { throw; } catch (DllNotFoundException dnfe) { throw new CudaException(CudaError.NoDriver, dnfe); } catch (Exception e) { throw new CudaException(CudaError.Unknown, e); } }); }
/// <summary> /// Records an event. If <c>stream</c> is non-zero, the event is recorded after all preceding operations in the stream have been /// completed; otherwise, it is recorded after all preceding operations in the CUDA context have been completed. Since /// operation is asynchronous, <see cref="Query"/> and/or <see cref="Synchronize"/> must be used to determine when the event /// has actually been recorded. <para/> /// If <see cref="Record()"/> has previously been called and the event has not been recorded yet, this function throws /// <see cref="CUResult.ErrorInvalidValue"/>. /// </summary> public void Record() { CUstream _stream = new CUstream(); res = DriverAPINativeMethods.Events.cuEventRecord(_event, _stream); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuEventRecord", res)); if (res != CUResult.Success) throw new CudaException(res); }
/// <summary> /// Creates a new 1D FFT plan (new API) /// </summary> /// <param name="handle">cufftHandle object</param> /// <param name="nx">The transform size (e.g., 256 for a 256-point FFT)</param> /// <param name="type">The transform data type (e.g., C2C for complex to complex)</param> /// <param name="batch">Number of transforms of size nx</param> /// <param name="stream">A valid CUDA stream created with cudaStreamCreate() (or 0 for the default stream)</param> /// <param name="mode">The <see cref="Compatibility"/> option to be used</param> /// <param name="size"></param> /// <param name="autoAllocate">indicates that the caller intends to allocate and manage /// work areas for plans that have been generated.</param> public CudaFFTPlan1D(cufftHandle handle, int nx, cufftType type, int batch, CUstream stream, Compatibility mode, ref SizeT size, bool autoAllocate) : this(handle, nx, type, batch, ref size) { SetStream(stream); SetCompatibilityMode(mode); SetAutoAllocation(autoAllocate); }
public CUstream CreateStream(StreamFlags flags) { CUstream phStream = new CUstream(); this.LastError = CUDADriver.cuStreamCreate(ref phStream, (uint)flags); return(phStream); }
public static extern nvjpegStatus nvjpegDecodeBatched( nvjpegHandle handle, nvjpegJpegState jpeg_handle, IntPtr[] data, SizeT[] lengths, nvjpegImage[] destinations, CUstream stream);
/// <summary> /// Creates a new 2D FFT plan (new API) /// </summary> /// <param name="handle">cufftHandle object</param> /// <param name="nx">The transform size in the X dimension (number of rows)</param> /// <param name="ny">The transform size in the Y dimension (number of columns)</param> /// <param name="type">The transform data type (e.g., C2R for complex to real)</param> /// <param name="stream">A valid CUDA stream created with cudaStreamCreate() (or 0 for the default stream)</param> /// <param name="mode">The <see cref="Compatibility"/> option to be used</param> /// <param name="autoAllocate">indicates that the caller intends to allocate and manage /// work areas for plans that have been generated.</param> public CudaFFTPlan2D(cufftHandle handle, int nx, int ny, cufftType type, CUstream stream, Compatibility mode, bool autoAllocate) : this(handle, nx, ny, type) { SetStream(stream); SetCompatibilityMode(mode); SetAutoAllocation(autoAllocate); }
public CUdeviceptr CopyHostToDeviceAsync(IntPtr buffer, uint size, CUstream stream) { CUdeviceptr devPtr = this.Allocate(size); this.CopyHostToDeviceAsync(devPtr, buffer, size, stream); return(devPtr); }
public static extern nvjpegStatus nvjpegDecodeJpeg( nvjpegHandle handle, nvjpegJpegDecoder decoder, nvjpegJpegState decoder_state, nvjpegJpegStream jpeg_bitstream, ref nvjpegImage destination, nvjpegDecodeParams decode_params, CUstream stream);
/// <summary> /// Set the current stream for CURAND kernel launches. All library functions /// will use this stream until set again. /// </summary> /// <param name="stream"></param> public void SetStream(CUstream stream) { _status = CudaRandNativeMethods.curandSetStream(_generator, stream); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "curandSetStream", _status)); if (_status != CurandStatus.Success) { throw new CudaRandException(_status); } }
/// <summary> /// Associates a CUDA stream with a CUFFT plan. All kernel launches /// made during plan execution are now done through the associated /// stream, enabling overlap with activity in other streams (for example, /// data copying). The association remains until the plan is destroyed or /// the stream is changed with another call to SetStream(). /// </summary> /// <param name="stream">A valid CUDA stream created with cudaStreamCreate() (or 0 for the default stream)</param> public void SetStream(CUstream stream) { res = CudaFFTNativeMethods.cufftSetStream(_handle, stream); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cufftSetStream", res)); if (res != cufftResult.Success) { throw new CudaFFTException(res); } }
public static extern nvjpegStatus nvjpegEncodeImage( nvjpegHandle handle, nvjpegEncoderState encoder_state, nvjpegEncoderParams encoder_params, ref nvjpegImage source, nvjpegInputFormat input_format, int image_width, int image_height, CUstream stream);
public void launchCopyKernel(physx.PxGpuCopyDescPtr desc, uint count, CUstream stream) { physx.PxGpuDispatcherPtr pvk_in_this = this; physx.PxGpuCopyDescPtr pvk_in_desc = desc; uint pvk_in_count = count; CUstream pvk_in_stream = stream; void_PxGpuDispatcherPtr_launchCopyKernelPtr_PxGpuCopyDescPtr_uint_CUstream_(pvk_in_this, pvk_in_desc, pvk_in_count, pvk_in_stream); }
public override void Init() { cuda = new CUDA(0, true); var cuCtx = cuda.CreateContext(0, CUCtxFlags.MapHost); cuda.SetCurrentContext(cuCtx); cuModule = cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, cudaModuleName)); cuFunc = cuda.GetModuleFunction(cudaEvaluatorKernelName); cuFuncSign = cuda.GetModuleFunction(cudaSignKernelName); //reserved memory based on dimension of support vector //svVector = new float[TrainedModel.SupportElements[0].Count]; stream = cuda.CreateStream(); //memSvSize = (uint)(TrainedModel.SupportElements[0].Count * sizeof(float)); memSvSize = (uint)(TrainedModel.SupportElements[0].Dim * sizeof(float)); //allocates memory for buffers svVecIntPtrs[0] = cuda.AllocateHost(memSvSize); svVecIntPtrs[1] = cuda.AllocateHost(memSvSize); mainVecPtr = cuda.CopyHostToDeviceAsync(svVecIntPtrs[0], memSvSize, stream); cuSVTexRef = cuda.GetModuleTexture(cuModule, "svTexRef"); cuda.SetTextureFlags(cuSVTexRef, 0); cuda.SetTextureAddress(cuSVTexRef, mainVecPtr, memSvSize); //todo: copy labels and alphas float[] svLabels = new float[TrainedModel.SupportElements.Length]; float[] svAlphas = new float[TrainedModel.SupportElements.Length]; Parallel.For(0, TrainedModel.SupportElementsIndexes.Length, i => { int idx = TrainedModel.SupportElementsIndexes[i]; svLabels[i] = TrainedModel.Y[i]; //svLabels[i] = TrainningProblem.Labels[idx]; svAlphas[i] = TrainedModel.Alpha[idx]; }); //for (int i = 0; i < TrainedModel.SupportElementsIndexes.Length; i++) //{ // int idx = TrainedModel.SupportElementsIndexes[i]; // svLabels[i]= TrainningProblem.Labels[idx]; // svAlphas[i] = TrainedModel.Alpha[idx]; //} labelsPtr = cuda.CopyHostToDevice(svLabels); alphasPtr = cuda.CopyHostToDevice(svAlphas); IsInitialized = true; }
public static extern nvjpegStatus nvjpegEncodeYUV( nvjpegHandle handle, nvjpegEncoderState encoder_state, nvjpegEncoderParams encoder_params, ref nvjpegImage source, nvjpegChromaSubsampling chroma_subsampling, int image_width, int image_height, CUstream stream);
public override void SetStream(int streamId) { if (streamId < 0) { throw new ArgumentOutOfRangeException("streamId"); } CUstream cus = (CUstream)_gpu.GetStream(streamId); SafeCall(_driver.SetStream(_gen, cus)); }
public void DestroyStream(CUstream stream) { if (_version >= 4000) { this.LastError = CUDADriver.cuStreamDestroy_v2(stream); } else { this.LastError = CUDADriver.cuStreamDestroy(stream); } }
/// <summary> /// Creates a new Stream using <see cref="CUStreamFlags.None"/> and with the given priority<para/> /// This API alters the scheduler priority of work in the stream. Work in a higher priority stream /// may preempt work already executing in a low priority stream.<para/> /// <c>priority</c> follows a convention where lower numbers represent higher priorities.<para/> /// '0' represents default priority. /// </summary> /// <param name="priority">Stream priority. Lower numbers represent higher priorities.</param> /// <param name="flags">Parameters for stream creation (must be <see cref="CUStreamFlags.None"/>)</param> public CudaStream(int priority, CUStreamFlags flags) { _stream = new CUstream(); res = DriverAPINativeMethods.Streams.cuStreamCreateWithPriority(ref _stream, flags, priority); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuStreamCreateWithPriority", res)); if (res != CUResult.Success) { throw new CudaException(res); } _isOwner = true; }
/// <summary> /// Asynchron copy device to host /// </summary> /// <param name="deviceVar"></param> /// <param name="stream"></param> public void AsyncCopyFromDevice(CudaDeviceVariable <T> deviceVar, CUstream stream) { if (disposed) { throw new ObjectDisposedException(this.ToString()); } res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, deviceVar.DevicePointer, SizeInBytes, stream); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res)); if (res != CUResult.Success) { throw new CudaException(res); } }
/// <summary> /// This function gets the stream to be used by the cudnn library to execute its routines. /// </summary> public CudaStream GetStream() { CUstream stream = new CUstream(); res = CudaDNNNativeMethods.cudnnGetStream(_handle, ref stream); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetStream", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } return(new CudaStream(stream)); }
/// <summary> /// Asynchron copy 1D Array to host /// </summary> /// <param name="deviceArray"></param> /// <param name="stream"></param> /// <param name="offset">bytes</param> public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset) { if (disposed) { throw new ObjectDisposedException(this.ToString()); } res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream); Debug.Write(""); //Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res)); if (res != CUResult.Success) { throw new CudaException(res); } }
/// <summary> /// Asynchron Copy host to device /// </summary> /// <param name="devicePtr"></param> /// <param name="stream"></param> public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream) { if (disposed) { throw new ObjectDisposedException(this.ToString()); } res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res)); if (res != CUResult.Success) { throw new CudaException(res); } }
/// <summary> /// Unmaps the graphics resource.<para/> /// Once unmapped, the resource may not be accessed by CUDA until they are mapped again.<para/> /// This function provides the synchronization guarantee that any CUDA work issued in <c>stream</c> before <see cref="UnMap()"/> /// will complete before any subsequently issued graphics work begins.<para/> /// If the resource is not presently mapped for access by CUDA then <see cref="CUResult.ErrorNotMapped"/> exception is thrown. /// </summary> /// <param name="stream"></param> public void UnMap(CUstream stream) { if (disposed) { throw new ObjectDisposedException(this.ToString()); } res = DriverAPINativeMethods.GraphicsInterop.cuGraphicsUnmapResources(1, ref _cudaResource, stream); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuGraphicsUnmapResources", res)); if (res != CUResult.Success) { throw new CudaException(res); } _IsMapped = false; }
/// <summary> /// Sets the stream. /// </summary> /// <param name="plan">The plan to set the stream for.</param> /// <param name="streamId">The stream id.</param> public override void SetStream(FFTPlan plan, int streamId) { if (streamId < 0) { throw new ArgumentOutOfRangeException("streamId"); } CUstream cus = (CUstream)_gpu.GetStream(streamId); FFTPlanEx planEx = Plans[plan]; cudaStream cs = new cudaStream(); //cs.Value = cus.Pointer.ToInt32(); CUFFTResult res = _driver.cufftSetStream(planEx.CudaFFTHandle, cs); if (res != CUFFTResult.Success) { throw new CudafyMathException(CudafyMathException.csCUDA_EXCEPTION_X, res); } }
/// <summary> /// Unmaps all graphics resources.<para/> /// Once unmapped, the resources may not be accessed by CUDA until they are mapped again.<para/> /// This function provides the synchronization guarantee that any CUDA work issued in <c>stream</c> before <see cref="UnmapAllResources()"/> /// will complete before any subsequently issued graphics work begins.<para/> /// If any of the resources are not presently mapped for access by CUDA then <see cref="CUResult.ErrorNotMapped"/> exception is thrown. /// </summary> /// <param name="stream"></param> public void UnmapAllResources(CUstream stream) { if (disposed) { throw new ObjectDisposedException(this.ToString()); } CUResult res; res = DriverAPINativeMethods.GraphicsInterop.cuGraphicsUnmapResources((uint)_CUResources.Count, _CUResources.ToArray(), stream); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuGraphicsUnmapResources", res)); if (res != CUResult.Success) { throw new CudaException(res); } foreach (var elem in _resources) { elem.SetIsUnmapped(); } }
public void RunAsync(CudaStream stream, params object[] args) { for (int i = 0; i < args.Length; i++) { if (args[i] is MyAbstractMemoryBlock) { args[i] = (args[i] as MyAbstractMemoryBlock).GetDevicePtr(m_GPU); if (((CUdeviceptr)args[i]).Pointer == 0) { // TODO(Premek): this is now handled in observers, should be also handled in the simulation throw new InvalidOperationException("Memory block resolved to null device ptr (not allocated on device?)."); } } } CUstream cuStream = CUstream.NullStream; if (stream != null) { cuStream = stream.Stream; } m_kernel.RunAsync(cuStream, args); }
/// <summary> /// Asynchron copy 2D Array to host /// </summary> /// <param name="deviceArray"></param> /// <param name="stream"></param> public void AsyncCopyFromArray2D(CUarray deviceArray, CUstream stream) { if (disposed) { throw new ObjectDisposedException(this.ToString()); } CUDAMemCpy2D cpyProps = new CUDAMemCpy2D(); cpyProps.srcArray = deviceArray; cpyProps.srcMemoryType = CUMemoryType.Array; cpyProps.dstHost = _intPtr; cpyProps.dstMemoryType = CUMemoryType.Host; cpyProps.dstPitch = _pitchInBytes; cpyProps.WidthInBytes = _width * _typeSize; cpyProps.Height = _height; res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpy2DAsync_v2(ref cpyProps, stream); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpy2DAsync", res)); if (res != CUResult.Success) { throw new CudaException(res); } }
/// <summary> /// Creates a new wrapper for an existing stream /// </summary> public CudaStream(CUstream _stream) { _isOwner = false; }
public curandStatus SetStream(RandGenerator generator, CUstream stream) { return(curandSetStream(generator, stream)); }
private static extern curandStatus curandSetStream(RandGenerator generator, CUstream stream);
public static extern cufftResult cufftSetStream([In] cufftHandle plan, [In] CUstream stream);