/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView( /// AcceleratorStream, ArrayView{T}, LongIndex1)"/> protected internal unsafe override void CopyFromView( AcceleratorStream stream, ArrayView <T> source, LongIndex1 targetOffset) { var binding = Accelerator.BindScoped(); var sourceAddress = new IntPtr(source.LoadEffectiveAddress()); var targetAddress = new IntPtr(ComputeEffectiveAddress(targetOffset)); var lengthInBytes = new IntPtr(source.LengthInBytes); switch (source.AcceleratorType) { case AcceleratorType.CPU: case AcceleratorType.Cuda: CudaException.ThrowIfFailed( CurrentAPI.MemcpyAsync( targetAddress, sourceAddress, lengthInBytes, stream)); break; default: throw new NotSupportedException( RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyToView(AcceleratorStream, ArrayView{T}, Index)"/> protected internal unsafe override void CopyToView( AcceleratorStream stream, ArrayView <T> target, Index sourceOffset) { var binding = Accelerator.BindScoped(); var targetBuffer = target.Source; var sourceAddress = new IntPtr(ComputeEffectiveAddress(sourceOffset)); var targetAddress = new IntPtr(target.LoadEffectiveAddress()); switch (targetBuffer.AcceleratorType) { case AcceleratorType.CPU: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToHost( targetAddress, sourceAddress, new IntPtr(target.LengthInBytes), stream)); break; case AcceleratorType.Cuda: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToDevice( targetAddress, sourceAddress, new IntPtr(target.LengthInBytes), stream)); break; default: throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView(AcceleratorStream, ArrayView{T}, Index)"/> protected internal unsafe override void CopyFromView( AcceleratorStream stream, ArrayView <T> source, Index targetOffset) { var clStream = (CLStream)stream; switch (source.AcceleratorType) { case AcceleratorType.CPU: CLException.ThrowIfFailed( CLAPI.WriteBuffer( clStream.CommandQueue, NativePtr, false, new IntPtr(targetOffset * ElementSize), new IntPtr(source.LengthInBytes), new IntPtr(source.LoadEffectiveAddress()))); break; case AcceleratorType.OpenCL: CLException.ThrowIfFailed( CLAPI.CopyBuffer( clStream.CommandQueue, source.Source.NativePtr, NativePtr, new IntPtr(source.Index * ElementSize), new IntPtr(targetOffset * ElementSize), new IntPtr(source.LengthInBytes))); break; default: throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator); } }
/// <summary> /// Performs a reduction using a reduction logic. /// </summary> /// <typeparam name="T">The underlying type of the reduction.</typeparam> /// <typeparam name="TShuffleDown">The type of the shuffle logic.</typeparam> /// <typeparam name="TReduction">The type of the reduction logic.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="input">The input elements to reduce.</param> /// <param name="output">The output view to store the reduced value.</param> /// <param name="shuffleDown">The shuffle logic.</param> /// <param name="reduction">The reduction logic.</param> /// <remarks>Uses the internal cache to realize a temporary output buffer.</remarks> public static void Reduce <T, TShuffleDown, TReduction>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> input, ArrayView <T> output, TShuffleDown shuffleDown, TReduction reduction) where T : struct where TShuffleDown : struct, IShuffleDown <T> where TReduction : struct, IReduction <T> { if (!input.IsValid) { throw new ArgumentNullException(nameof(input)); } if (input.Length < 1) { throw new ArgumentOutOfRangeException(nameof(input)); } var tempStorageSize = accelerator.ComputeReductionTempStorageSize(input.Length); var temp = accelerator.MemoryCache.Allocate <T>(tempStorageSize); accelerator.CreateReduction <T, TShuffleDown, TReduction>()( stream, input, output, temp, shuffleDown, reduction); }
/// <summary> /// Calculates the histogram on the given view (without overflow checking). /// </summary> /// <typeparam name="T">The input view element type.</typeparam> /// <typeparam name="TIndex">The input view index type.</typeparam> /// <typeparam name="TBinType">The histogram bin type.</typeparam> /// <typeparam name="TIncrementor"> /// The operation to increment the value of the bin. /// </typeparam> /// <typeparam name="TLocator"> /// The operation to compute the bin location. /// </typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="view">The input view.</param> /// <param name="histogram">The histogram view to update.</param> public static void HistogramUnchecked < T, TIndex, TBinType, TIncrementor, TLocator>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T, TIndex> view, ArrayView <TBinType> histogram) where T : unmanaged where TIndex : unmanaged, IIndex, IGenericIndex <TIndex> where TBinType : unmanaged where TIncrementor : struct, IIncrementOperation <TBinType> where TLocator : struct, IComputeMultiBinOperation <T, TBinType, TIncrementor> { accelerator.CreateHistogramUnchecked < T, TIndex, TBinType, TIncrementor, TLocator>()( stream, view, histogram); }
/// <summary> /// Performs a reduction using a reduction logic. /// </summary> /// <typeparam name="T">The underlying type of the reduction.</typeparam> /// <typeparam name="TShuffleDown">The type of the shuffle logic.</typeparam> /// <typeparam name="TReduction">The type of the reduction logic.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="input">The input elements to reduce.</param> /// <param name="shuffleDown">The shuffle logic.</param> /// <param name="reduction">The reduction logic.</param> /// <remarks>Uses the internal cache to realize a temporary output buffer.</remarks> /// <returns>The reduced value.</returns> public static T Reduce <T, TShuffleDown, TReduction>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> input, TShuffleDown shuffleDown, TReduction reduction) where T : struct where TShuffleDown : struct, IShuffleDown <T> where TReduction : struct, IReduction <T> { if (!input.IsValid) { throw new ArgumentNullException(nameof(input)); } if (input.Length < 1) { throw new ArgumentOutOfRangeException(nameof(input)); } var tempStorageSize = accelerator.ComputeReductionTempStorageSize(input.Length); var storage = accelerator.MemoryCache.Allocate <T>(tempStorageSize + 1); var output = storage.GetSubView(0, 1); var temp = storage.GetSubView(1); accelerator.Reduce( stream, input, output, temp, shuffleDown, reduction); stream.Synchronize(); accelerator.MemoryCache.CopyTo(out T result, 0); return(result); }
/// <summary> /// Transforms elements in the source view into elements in the target view using /// the given transformer. /// </summary> /// <typeparam name="TSource"> /// The source type of the elements to transform. /// </typeparam> /// <typeparam name="TSourceStride">The 1D stride of the source view.</typeparam> /// <typeparam name="TTarget"> /// The target type of the elements that have been transformed. /// </typeparam> /// <typeparam name="TTargetStride">The 1D stride of the target view.</typeparam> /// <typeparam name="TTransformer"> /// The transformer to transform elements from the source type to the target type. /// </typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="source">The source elements to transform</param> /// <param name="target"> /// The target elements that will contain the transformed values. /// </param> /// <param name="transformer">The used transformer.</param> public static void Transform < TSource, TSourceStride, TTarget, TTargetStride, TTransformer>( this Accelerator accelerator, AcceleratorStream stream, ArrayView1D <TSource, TSourceStride> source, ArrayView1D <TTarget, TTargetStride> target, TTransformer transformer) where TSource : unmanaged where TSourceStride : struct, IStride1D where TTarget : unmanaged where TTargetStride : struct, IStride1D where TTransformer : struct, ITransformer <TSource, TTarget> => accelerator.CreateTransformer < TSource, TSourceStride, TTarget, TTargetStride, TTransformer>()( stream, source, target, transformer);
/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromViewInternal(ArrayView{T, Index}, AcceleratorType, TIndex, AcceleratorStream)"/> protected internal override unsafe void CopyFromViewInternal( ArrayView <T, Index> source, AcceleratorType acceleratorType, TIndex targetOffset, AcceleratorStream stream) { switch (acceleratorType) { case AcceleratorType.CPU: Buffer.MemoryCopy( source.Pointer.ToPointer(), GetSubView(targetOffset).Pointer.ToPointer(), source.LengthInBytes, source.LengthInBytes); break; case AcceleratorType.Cuda: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToHost( GetSubView(targetOffset).Pointer, source.Pointer, new IntPtr(source.LengthInBytes), stream)); break; default: throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator); } }
public ComputeSession(string sessionID, Accelerator accelerator, ComputeHost host) { SessionID = sessionID; _stream = accelerator.CreateStream(); _host = host; }
/// <summary> /// Performs a reduction using a reduction logic. /// </summary> /// <typeparam name="T">The underlying type of the reduction.</typeparam> /// <typeparam name="TReduction">The type of the reduction logic.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="input">The input elements to reduce.</param> /// <remarks> /// Uses the internal cache to realize a temporary output buffer. /// </remarks> /// <returns>The reduced value.</returns> public static Task <T> ReduceAsync <T, TReduction>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> input) where T : unmanaged where TReduction : struct, IScanReduceOperation <T> => Task.Run(() => accelerator.Reduce <T, TReduction>(stream, input));
/// <summary cref="MemoryBuffer{T, TIndex}.CopyToViewInternal(ArrayView{T, Index}, AcceleratorType, TIndex, AcceleratorStream)"/> protected internal override void CopyToViewInternal( ArrayView <T, Index> target, AcceleratorType acceleratorType, TIndex sourceOffset, AcceleratorStream stream) { switch (acceleratorType) { case AcceleratorType.CPU: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToHost( target.Pointer, GetSubView(sourceOffset).Pointer, new IntPtr(target.LengthInBytes), stream)); break; case AcceleratorType.Cuda: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToDevice( target.Pointer, GetSubView(sourceOffset).Pointer, new IntPtr(target.LengthInBytes), stream)); break; default: throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator); } }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView(AcceleratorStream, ArrayView{T}, Index)"/> protected internal unsafe override void CopyFromView( AcceleratorStream stream, ArrayView <T> source, Index targetOffset) { var binding = stream.BindScoped(); var targetAddress = ComputeEffectiveAddress(targetOffset); switch (source.AcceleratorType) { case AcceleratorType.CPU: Unsafe.CopyBlock( targetAddress, source.LoadEffectiveAddress(), (uint)source.LengthInBytes); break; case AcceleratorType.Cuda: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToHost( new IntPtr(targetAddress), new IntPtr(source.LoadEffectiveAddress()), new IntPtr(source.LengthInBytes), stream)); break; default: throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
/// <summary> /// Access a specific slice of either a column 'c' or row 'r' of this vector /// </summary> /// <param name="gpu"></param> /// <param name="row_col_index"></param> /// <param name="row_col"></param> /// <returns></returns> public Vector _AccessSlice(int row_col_index, char row_col) { if (this.Columns == 1) { throw new Exception("Input Vector cannot be 1D"); } int[] ChangeSelectLength; int OutPutVectorLength; switch (row_col) { case 'r': //ChangeSelectLength = new int[5] { 0, 1, row_col_index, 0, vector.Columns }; //OutPutVectorLength = vector.Columns; return(this._AccessRow(this, row_col_index)); case 'c': ChangeSelectLength = new int[5] { 1, 0, 0, row_col_index, this.Columns }; OutPutVectorLength = this.Value.Length / this.Columns; break; default: throw new Exception("Invalid slice char selector, choose 'r' for row or 'c' for column"); } //this is bad and I should feel bad Accelerator gpu = this.gpu.accelerator; AcceleratorStream Stream = gpu.CreateStream(); var kernelWithStream = gpu.LoadAutoGroupedKernel <Index1, ArrayView <float>, ArrayView <float>, ArrayView <int> >(AccessSliceKernal); var buffer = gpu.Allocate <float>(OutPutVectorLength); var buffer2 = gpu.Allocate <float>(this.Value.Length); var buffer3 = gpu.Allocate <int>(5); buffer.MemSetToZero(Stream); buffer2.MemSetToZero(Stream); buffer3.MemSetToZero(Stream); buffer2.CopyFrom(Stream, this.Value, 0, 0, this.Value.Length); buffer3.CopyFrom(Stream, ChangeSelectLength, 0, 0, ChangeSelectLength.Length); kernelWithStream(Stream, OutPutVectorLength, buffer.View, buffer2.View, buffer3.View); Stream.Synchronize(); float[] Output = buffer.GetAsArray(Stream); buffer.Dispose(); buffer2.Dispose(); buffer3.Dispose(); Stream.Dispose(); return(new Vector(this.gpu, Output)); }
/// <summary cref="MemoryBuffer.MemSetToZero(AcceleratorStream)"/> public override void MemSetToZero(AcceleratorStream stream) => CLException.ThrowIfFailed( CLAPI.FillBuffer <byte>( ((CLStream)stream).CommandQueue, NativePtr, 0, IntPtr.Zero, new IntPtr(LengthInBytes)));
/// <inheritdoc/> protected internal override unsafe void MemSetInternal( AcceleratorStream stream, byte value, long offsetInBytes, long lengthInBytes) { stream.Synchronize(); ref byte targetAddress = ref Unsafe.AsRef <byte>(NativePtr.ToPointer());
/// <summary cref="MemoryBuffer.MemSetToZero(AcceleratorStream)"/> public override void MemSetToZero(AcceleratorStream stream) { var binding = Accelerator.BindScoped(); CudaAPI.Current.Memset(NativePtr, 0, new IntPtr(LengthInBytes), stream); binding.Recover(); }
/// <summary> /// Performs an initialization on the given view. /// </summary> /// <typeparam name="T">The element type.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="view">The element view.</param> /// <param name="value">The target value.</param> public static void Initialize <T>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> view, T value) where T : unmanaged => accelerator.CreateInitializer <T, Stride1D.Dense>()( stream, view, value);
/// <summary> /// Performs a reduction using a reduction logic. /// </summary> /// <typeparam name="T">The underlying type of the reduction.</typeparam> /// <typeparam name="TReduction">The type of the reduction logic.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="input">The input elements to reduce.</param> /// <param name="output">The output view to store the reduced value.</param> public static void Reduce <T, TReduction>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> input, ArrayView <T> output) where T : unmanaged where TReduction : struct, IScanReduceOperation <T> => accelerator.CreateReduction <T, Stride1D.Dense, TReduction>()( stream, input, output);
/// <summary> /// Performs an initialization on the given view. /// </summary> /// <typeparam name="T">The element type.</typeparam> /// <typeparam name="TStride">The 1D stride of the target view.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="view">The element view.</param> /// <param name="value">The target value.</param> public static void Initialize <T, TStride>( this Accelerator accelerator, AcceleratorStream stream, ArrayView1D <T, TStride> view, T value) where T : unmanaged where TStride : struct, IStride1D => accelerator.CreateInitializer <T, TStride>()( stream, view, value);
/// <summary> /// Computes a new sequence of values from 0 to view.Length - 1 and writes /// the computed values to the given view. /// </summary> /// <typeparam name="T">The element type.</typeparam> /// <typeparam name="TSequencer">The type of the sequencer to use.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="view">The target view.</param> /// <param name="sequencer">The used sequencer.</param> public static void Sequence <T, TSequencer>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> view, TSequencer sequencer) where T : unmanaged where TSequencer : struct, ISequencer <T> => accelerator.CreateSequencer <T, Stride1D.Dense, TSequencer>()( stream, view, sequencer);
/// <summary> /// Performs an initialization on the given view. /// </summary> /// <typeparam name="T">The element type.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="view">The element view.</param> /// <param name="value">The target value.</param> public static void Initialize <T>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> view, T value) where T : struct { accelerator.CreateInitializer <T>()( stream, view, value); }
/// <summary> /// Performs a reduction using a reduction logic. /// </summary> /// <typeparam name="T">The underlying type of the reduction.</typeparam> /// <typeparam name="TReduction">The type of the reduction logic.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="input">The input elements to reduce.</param> /// <remarks> /// Uses the internal cache to realize a temporary output buffer. /// </remarks> /// <returns>The reduced value.</returns> public static T Reduce <T, TReduction>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> input) where T : unmanaged where TReduction : struct, IScanReduceOperation <T> { var output = accelerator.MemoryCache.Allocate <T>(1); accelerator.Reduce <T, TReduction>(stream, input, output); output.CopyToCPU(stream, out T result, 1); return(result); }
/// <summary> /// Computes a new sequence of batched values of length sequenceBatchLength, and /// writes the computed values to the given view. Afterwards, the target view will /// contain the following values: /// - [0, sequenceBatchLength - 1] = 0,, /// - [sequenceBatchLength, sequenceBatchLength * 2 -1] = 1, /// - ... /// </summary> /// <typeparam name="T">The element type.</typeparam> /// <typeparam name="TSequencer">The type of the sequencer to use.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="view">The target view.</param> /// <param name="sequenceBatchLength">The length of a single batch.</param> /// <param name="sequencer">The used sequencer.</param> public static void BatchedSequence <T, TSequencer>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> view, LongIndex1D sequenceBatchLength, TSequencer sequencer) where T : unmanaged where TSequencer : struct, ISequencer <T> => accelerator.CreateBatchedSequencer <T, Stride1D.Dense, TSequencer>()( stream, view, sequenceBatchLength, sequencer);
/// <summary> /// Transforms elements in the source view into elements in the target view using /// the given transformer. /// </summary> /// <typeparam name="T">The type of the elements to transform.</typeparam> /// <typeparam name="TTransformer"> /// The transformer to transform elements from the source type to the target type. /// </typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="source">The source elements to transform</param> /// <param name="target"> /// The target elements that will contain the transformed values. /// </param> /// <param name="transformer">The used transformer.</param> public static void Transform <T, TTransformer>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> source, ArrayView <T> target, TTransformer transformer) where T : unmanaged where TTransformer : struct, ITransformer <T, T> => accelerator.CreateTransformer <T, TTransformer>()( stream, source, target, transformer);
/// <summary cref="MemoryBuffer.MemSetToZero(AcceleratorStream)"/> public override void MemSetToZero(AcceleratorStream stream) { var binding = Accelerator.BindScoped(); CudaException.ThrowIfFailed( CurrentAPI.Memset( NativePtr, 0, new IntPtr(LengthInBytes), stream)); binding.Recover(); }
/// <summary> /// Computes a new sequence of values from 0 to view.Length - 1 and writes /// the computed values to the given view. /// </summary> /// <typeparam name="T">The element type.</typeparam> /// <typeparam name="TSequencer">The type of the sequencer to use.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="view">The target view.</param> /// <param name="sequencer">The used sequencer.</param> public static void Sequence <T, TSequencer>( this Accelerator accelerator, AcceleratorStream stream, ArrayView <T> view, TSequencer sequencer) where T : struct where TSequencer : struct, ISequencer <T> { accelerator.CreateSequencer <T, TSequencer>()( stream, view, sequencer); }
public CudaError MemcpyAsync( IntPtr destination, IntPtr source, IntPtr length, AcceleratorStream stream) { var cudaStream = stream as CudaStream; return(cuMemcpyAsync( destination, source, length, cudaStream?.StreamPtr ?? IntPtr.Zero)); }
public CudaError MemcpyDeviceToDevice( IntPtr destinationDevice, IntPtr sourceDevice, IntPtr length, AcceleratorStream stream) { CudaStream cudaStream = stream as CudaStream; return(MemcpyDeviceToDevice( destinationDevice, sourceDevice, length, cudaStream?.StreamPtr ?? IntPtr.Zero)); }
public CudaError Memset( IntPtr destinationDevice, byte value, IntPtr length, AcceleratorStream stream) { var cudaStream = stream as CudaStream; return(Memset( destinationDevice, value, length, cudaStream?.StreamPtr ?? IntPtr.Zero)); }
/// <summary> /// Computes a new repeated sequence of values from 0 to sequenceLength, from 0 to /// sequenceLength, ... and writes the computed values to the given view. /// Afterwards, the target view will contain the following values: /// - [0, sequenceLength - 1] = [0, sequenceLength] /// - [sequenceLength, sequenceLength * 2 -1] = [0, sequenceLength] /// - ... /// </summary> /// <typeparam name="T">The element type.</typeparam> /// <typeparam name="TStride">The 1D stride of the view.</typeparam> /// <typeparam name="TSequencer">The type of the sequencer to use.</typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="stream">The accelerator stream.</param> /// <param name="view">The target view.</param> /// <param name="sequenceLength">The length of a single sequence.</param> /// <param name="sequencer">The used sequencer.</param> public static void RepeatedSequence <T, TStride, TSequencer>( this Accelerator accelerator, AcceleratorStream stream, ArrayView1D <T, TStride> view, LongIndex1D sequenceLength, TSequencer sequencer) where T : unmanaged where TStride : struct, IStride1D where TSequencer : struct, ISequencer <T> => accelerator.CreateRepeatedSequencer <T, TStride, TSequencer>()( stream, view, sequenceLength, sequencer);