/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView( /// AcceleratorStream, ArrayView{T}, LongIndex1)"/> protected internal unsafe override void CopyFromView( AcceleratorStream stream, ArrayView <T> source, LongIndex1 targetOffset) { var binding = Accelerator.BindScoped(); var sourceAddress = new IntPtr(source.LoadEffectiveAddress()); var targetAddress = new IntPtr(ComputeEffectiveAddress(targetOffset)); var lengthInBytes = new IntPtr(source.LengthInBytes); switch (source.AcceleratorType) { case AcceleratorType.CPU: case AcceleratorType.Cuda: CudaException.ThrowIfFailed( CurrentAPI.MemcpyAsync( targetAddress, sourceAddress, lengthInBytes, stream)); break; default: throw new NotSupportedException( RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyToView( /// AcceleratorStream, ArrayView{T}, LongIndex1)"/> protected internal unsafe override void CopyToView( AcceleratorStream stream, ArrayView <T> target, LongIndex1 sourceOffset) { var binding = stream.BindScoped(); var sourceAddress = ComputeEffectiveAddress(sourceOffset); var targetAddress = target.LoadEffectiveAddress(); switch (target.AcceleratorType) { case AcceleratorType.CPU: Buffer.MemoryCopy( sourceAddress, targetAddress, target.LengthInBytes, target.LengthInBytes); break; case AcceleratorType.Cuda: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyHostToDevice( new IntPtr(targetAddress), new IntPtr(sourceAddress), new IntPtr(target.LengthInBytes), stream)); break; default: throw new NotSupportedException( RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
/// <summary> /// Computes the required number of temp-storage elements for a radix sort /// operation and the given data length. /// </summary> /// <typeparam name="T">The underlying type of the sort operation.</typeparam> /// <typeparam name="TRadixSortOperation"> /// The type of the radix-sort operation. /// </typeparam> /// <param name="accelerator">The accelerator.</param> /// <param name="dataLength">The number of data elements to sort.</param> /// <returns> /// The required number of temp-storage elements in 32 bit ints. /// </returns> public static Index1 ComputeRadixSortTempStorageSize <T, TRadixSortOperation>( this Accelerator accelerator, Index1 dataLength) where T : unmanaged where TRadixSortOperation : struct, IRadixSortOperation <T> { LongIndex1 tempScanMemoryLong = accelerator.ComputeScanTempStorageSize <T>(dataLength); IndexTypeExtensions.AssertIntIndexRange(tempScanMemoryLong); Index1 tempScanMemory = tempScanMemoryLong.ToIntIndex(); int numGroups; if (accelerator.AcceleratorType == AcceleratorType.CPU) { numGroups = accelerator.MaxNumThreads; } else { var(gridDim, _) = accelerator.ComputeGridStrideLoopExtent( dataLength, out int numIterationsPerGroup); numGroups = gridDim * numIterationsPerGroup; } long numIntTElementsLong = Interop.ComputeRelativeSizeOf <int, T>(dataLength); IndexTypeExtensions.AssertIntIndexRange(numIntTElementsLong); int numIntTElements = (int)numIntTElementsLong; const int unrollFactor = 4; return(numGroups * unrollFactor * 2 + numIntTElements + tempScanMemory); }
/// <summary> /// Computes the required number of temp-storage elements of type /// <typeparamref name="T"/> for a unique operation and the given data length. /// </summary> /// <param name="accelerator">The accelerator.</param> /// <param name="dataLength">The number of data elements to scan.</param> /// <returns> /// The required number of temp-storage elements in 32 bit ints. /// </returns> public static LongIndex1 ComputeUniqueTempStorageSize <T>( this Accelerator accelerator, LongIndex1 dataLength) where T : unmanaged { // 1 int for SequentialGroupExecutor. return(1); }
internal static void ArrayViewLongLeaIndexKernel( Index1 index, ArrayView <int> data, ArrayView <int> source) { LongIndex1 longIndex = index; data[longIndex] = source[longIndex]; }
/// <summary> /// Copies data from the associated accelerator into CPU memory. /// </summary> /// <param name="acceleratorMemoryOffset">The source memory offset.</param> /// <param name="cpuMemoryOffset">the target memory offset.</param> /// <param name="extent">The extent (number of elements).</param> public void CopyFromAccelerator( TIndex acceleratorMemoryOffset, TIndex cpuMemoryOffset, LongIndex1 extent) => CopyFromAccelerator( Accelerator.DefaultStream, acceleratorMemoryOffset, cpuMemoryOffset, extent);
public readonly void Execute(LongIndex1 linearIndex) { if (linearIndex >= Data.Length) { return; } Data[linearIndex] = ToLong((ulong)Data[linearIndex]); }
/// <summary> /// Computes the required number of temp-storage elements of type <typeparamref name="T"/> /// for a scan operation and the given data length. /// </summary> /// <param name="accelerator">The accelerator.</param> /// <param name="dataLength">The number of data elements to scan.</param> /// <returns>The required number of temp-storage elements in 32 bit ints.</returns> public static LongIndex1 ComputeScanTempStorageSize <T>( this Accelerator accelerator, LongIndex1 dataLength) where T : unmanaged { return(accelerator.AcceleratorType switch { AcceleratorType.CPU => 1, AcceleratorType.Cuda => ComputeNumIntElementsForSinglePassScan <T>(), _ => Interop.ComputeRelativeSizeOf <int, T>(accelerator.MaxNumGroupsExtent.Item1), });
/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView( /// AcceleratorStream, ArrayView{T}, LongIndex1)"/> protected internal unsafe override void CopyFromView( AcceleratorStream stream, ArrayView <T> source, LongIndex1 targetOffset) { var binding = Accelerator.BindScoped(); var clStream = (CLStream)stream; switch (source.AcceleratorType) { case AcceleratorType.CPU: CLException.ThrowIfFailed( CurrentAPI.WriteBuffer( clStream.CommandQueue, NativePtr, false, new IntPtr(targetOffset * ElementSize), new IntPtr(source.LengthInBytes), new IntPtr(source.LoadEffectiveAddress()))); break; case AcceleratorType.OpenCL: CLException.ThrowIfFailed( CurrentAPI.CopyBuffer( clStream.CommandQueue, source.Source.NativePtr, NativePtr, new IntPtr(source.Index * ElementSize), new IntPtr(targetOffset * ElementSize), new IntPtr(source.LengthInBytes))); break; default: throw new NotSupportedException( RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyToView( /// AcceleratorStream, ArrayView{T}, LongIndex1)"/> protected internal unsafe override void CopyToView( AcceleratorStream stream, ArrayView <T> target, LongIndex1 sourceOffset) { var binding = Accelerator.BindScoped(); switch (target.AcceleratorType) { case AcceleratorType.CPU: CLException.ThrowIfFailed( CurrentAPI.ReadBuffer( stream, NativePtr, false, new IntPtr(sourceOffset * ElementSize), new IntPtr(target.LengthInBytes), new IntPtr(target.LoadEffectiveAddress()))); break; case AcceleratorType.OpenCL: CLException.ThrowIfFailed( CurrentAPI.CopyBuffer( stream, NativePtr, target.Source.NativePtr, new IntPtr(sourceOffset * ElementSize), new IntPtr(target.Index * ElementSize), new IntPtr(target.LengthInBytes))); break; default: throw new NotSupportedException( RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
/// <summary> /// Copies data from the associated accelerator into CPU memory. /// </summary> /// <param name="stream">The stream to use.</param> /// <param name="acceleratorMemoryOffset">The source memory offset.</param> /// <param name="cpuMemoryOffset">the target memory offset.</param> /// <param name="extent">The extent (number of elements).</param> public void CopyFromAccelerator( AcceleratorStream stream, TIndex acceleratorMemoryOffset, TIndex cpuMemoryOffset, LongIndex1 extent) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } if (!cpuMemoryOffset.InBounds(CPUView.Extent)) { throw new ArgumentOutOfRangeException(nameof(cpuMemoryOffset)); } if (!acceleratorMemoryOffset.InBounds(Extent)) { throw new ArgumentOutOfRangeException(nameof(acceleratorMemoryOffset)); } if (extent < 1 || extent > Length) { throw new ArgumentOutOfRangeException(nameof(extent)); } var linearSourceIndex = acceleratorMemoryOffset.ComputeLinearIndex(Extent); var linearTargetIndex = cpuMemoryOffset.ComputeLinearIndex(CPUView.Extent); if (linearSourceIndex + extent > Length || linearTargetIndex + extent > CPUView.Length) { throw new ArgumentOutOfRangeException(nameof(extent)); } CopyToView( stream, CPUView.GetSubView(cpuMemoryOffset, extent), linearSourceIndex); }
public static VariableView <T> GetVariableView <T>( this ArrayView <T> view, LongIndex1 element) where T : unmanaged => new VariableView <T>(view.GetSubView(element, 1L));