private void InvokeLauncher <T>( T dimension, AcceleratorStream stream, object[] args, int[] dynSharedMemArraySizes) where T : IIndex { var numSharedMemVars = dynSharedMemArraySizes == null ? 0 : dynSharedMemArraySizes.Length; var entryPoint = CompiledKernel.EntryPoint; if (entryPoint.NumDynamicallySizedSharedMemoryVariables != numSharedMemVars) { throw new ArgumentException(RuntimeErrorMessages.InvalidNumberOfDynamicallySharedMemoryVariableArgs); } if (entryPoint.NumUniformVariables != args.Length) { throw new ArgumentException(RuntimeErrorMessages.InvalidNumberOfUniformArgs); } var reflectionArgs = new object[KernelParameterOffset + args.Length + numSharedMemVars]; reflectionArgs[KernelInstanceParamIdx] = this; reflectionArgs[KernelStreamParamIdx] = stream ?? throw new ArgumentNullException(nameof(stream)); reflectionArgs[KernelParamDimensionIdx] = dimension; args.CopyTo(reflectionArgs, KernelParameterOffset); if (numSharedMemVars > 0) { dynSharedMemArraySizes.CopyTo(reflectionArgs, KernelParameterOffset + args.Length); } Launcher.Invoke(null, reflectionArgs); }
/// <summary> /// Copies the contents to this buffer to the given array. /// </summary> /// <param name="stream">The used accelerator stream.</param> /// <param name="target">The target array.</param> /// <param name="sourceOffset">The source offset.</param> /// <param name="targetOffset">The target offset.</param> /// <param name="extent">The length.</param> /// <remarks> /// Note that the output array will contain the data as a transposed array to /// match the source layout. /// </remarks> public void CopyTo( AcceleratorStream stream, T[,,] target, LongIndex3 sourceOffset, LongIndex3 targetOffset, LongIndex3 extent) { if (target == null) { throw new ArgumentNullException(nameof(target)); } if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.Z < 0 || sourceOffset.X >= Extent.X || sourceOffset.Y >= Extent.Y || sourceOffset.Z >= Extent.Z) { throw new ArgumentOutOfRangeException(nameof(sourceOffset)); } if (targetOffset.X < 0 || targetOffset.Y < 0 || targetOffset.Z < 0 || targetOffset.X >= target.GetLongLength(0) || targetOffset.Y >= target.GetLongLength(1) || targetOffset.Z >= target.GetLongLength(2)) { throw new ArgumentOutOfRangeException(nameof(targetOffset)); } if (extent.X < 0 || extent.Y < 0 || extent.Z < 0 || sourceOffset.X + extent.X > Extent.X || sourceOffset.Y + extent.Y > Extent.Y || sourceOffset.Z + extent.Z > Extent.Z || targetOffset.X + extent.X > target.GetLongLength(0) || targetOffset.Y + extent.Y > target.GetLongLength(1) || targetOffset.Z + extent.Z > target.GetLongLength(2)) { throw new ArgumentOutOfRangeException(nameof(extent)); } var tempBuffer = new T[extent.Size]; buffer.CopyTo(stream, tempBuffer, sourceOffset, 0, extent); for (long i = 0; i < extent.X; ++i) { for (long j = 0; j < extent.Y; ++j) { for (long k = 0; k < extent.Z; ++k) { var sourceIdx = new LongIndex3(i, j, k). ComputeLinearIndex(extent); target[ i + targetOffset.X, j + targetOffset.Y, k + targetOffset.Z] = tempBuffer[sourceIdx]; } } } }
/// <summary> /// Copies data from the associated accelerator into CPU memory. /// </summary> /// <param name="stream">The stream to use.</param> public void CopyFromAccelerator(AcceleratorStream stream) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } CopyToView(stream, CPUView.BaseView, 0); }
/// <summary> /// Copies data from CPU memory to the associated accelerator. /// </summary> /// <param name="stream">The stream to use.</param> /// <param name="acceleratorMemoryOffset">The target memory offset.</param> public void CopyToAccelerator( AcceleratorStream stream, TIndex acceleratorMemoryOffset) => CopyToAccelerator( stream, default, acceleratorMemoryOffset, CPUView.Length);
/// <summary> /// Copies data from CPU memory to the associated accelerator. /// </summary> /// <param name="stream">The stream to use.</param> public void CopyToAccelerator(AcceleratorStream stream) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } CopyFromView(stream, CPUView.AsLinearView(), 0); }
/// <summary> /// Copies data from the associated accelerator into CPU memory. /// </summary> /// <param name="stream">The stream to use.</param> /// <param name="cpuMemoryOffset">the target memory offset.</param> public void CopyFromAccelerator( AcceleratorStream stream, TIndex cpuMemoryOffset) => CopyFromAccelerator( stream, default, cpuMemoryOffset, Length);
public void CopyFrom( AcceleratorStream stream, T[][][] source, Index3 sourceOffset, Index3 targetOffset, Index3 extent) { if (extent.X < 0 || extent.Y < 0 || extent.Z < 0 || extent.X > source.Length) { throw new ArgumentOutOfRangeException(nameof(extent)); } if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.Z < 0 || sourceOffset.X >= extent.X || sourceOffset.Y >= extent.Y || sourceOffset.Z >= extent.Z) { throw new ArgumentOutOfRangeException(nameof(sourceOffset)); } var tempBuffer = new T[extent.Size]; for (int i = 0; i < extent.X; ++i) { var subData = source[i + sourceOffset.X]; if (subData == null) { continue; } for (int j = 0; j < extent.Y; ++j) { var subSubData = subData[j + sourceOffset.Y]; if (subSubData == null) { continue; } // Skip entries that are out of bounds for ( int k = 0, e = IntrinsicMath.Min(subSubData.Length, extent.Z); k < e; ++k) { var targetIdx = new Index3(i, j, k).ComputeLinearIndex(extent); tempBuffer[targetIdx] = subSubData[k + sourceOffset.Z]; } } } buffer.CopyFrom( stream, tempBuffer, 0, targetOffset, extent.Size); }
/// <summary> /// Copies data from CPU memory to the associated accelerator. /// </summary> /// <param name="stream">The stream to use.</param> /// <param name="cpuMemoryOffset">the source memory offset.</param> /// <param name="acceleratorMemoryOffset">The target memory offset.</param> public void CopyToAccelerator( AcceleratorStream stream, TIndex cpuMemoryOffset, TIndex acceleratorMemoryOffset) => CopyToAccelerator( stream, cpuMemoryOffset, acceleratorMemoryOffset, CPUArrayView.Length);
public void Launch <TIndex>( TIndex dimension, AcceleratorStream stream, int[] dynSharedMemArraySizes, params object[] args) where TIndex : struct, IGroupedIndex { InvokeLauncher(dimension, stream, args, dynSharedMemArraySizes); }
public unsafe void CopyFrom <T>(AcceleratorStream stream, T source, Index sourceIndex) where T : struct { using (var wrapper = ViewPointerWrapper.Create(ref source)) { var view = new ArrayView <T>(wrapper, 0, 1); cache.CopyFromView(stream, view.Cast <byte>(), sourceIndex); } }
public void CopyTo( AcceleratorStream stream, T[][][] target, Index3 sourceOffset, Index3 targetOffset, Index3 extent) { if (target == null) { throw new ArgumentNullException(nameof(target)); } if (extent.X < 0 || extent.Y < 0 || extent.Z < 0 || extent.X > target.Length) { throw new ArgumentOutOfRangeException(nameof(extent)); } if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.Z < 0 || sourceOffset.X >= Extent.X || sourceOffset.Y >= Extent.Y || sourceOffset.Z >= Extent.Z) { throw new ArgumentOutOfRangeException(nameof(sourceOffset)); } var tempBuffer = new T[extent.Size]; buffer.CopyTo( stream, tempBuffer, sourceOffset, 0, extent); for (int i = 0; i < extent.X; ++i) { var subData = target[i + targetOffset.X]; if (subData == null) { continue; } for (int j = 0; j < extent.Y; ++j) { var subSubData = subData[j + targetOffset.Y]; if (subSubData == null) { continue; } for (int k = 0; k < extent.Z; ++k) { var sourceIdx = new Index3(i, j, k).ComputeLinearIndex(extent); subSubData[k + targetOffset.Z] = tempBuffer[sourceIdx]; } } } }
/// <summary> /// Sets the contents of the current buffer to the given byte value. /// </summary> /// <param name="stream">The used accelerator stream.</param> /// <param name="value">The value to write into the memory buffer.</param> /// <param name="offsetInBytes">The raw offset in bytes.</param> /// <param name="lengthInBytes">The raw length in bytes.</param> protected internal override void MemSetInternal( AcceleratorStream stream, byte value, long offsetInBytes, long lengthInBytes) => Buffer.MemSetInternal( stream, value, offsetInBytes, lengthInBytes);
public unsafe void CopyTo <T>(AcceleratorStream stream, out T target, Index targetIndex) where T : struct { target = default; using (var wrapper = ViewPointerWrapper.Create(ref target)) { var view = new ArrayView <T>(wrapper, 0, 1); cache.CopyToView(stream, view.Cast <byte>(), targetIndex); } }
/// <summary> /// Copies the contents to this buffer from the given array. /// </summary> /// <param name="stream">The used accelerator stream.</param> /// <param name="source">The source array.</param> /// <param name="sourceOffset">The source offset.</param> /// <param name="targetOffset">The target offset.</param> /// <param name="extent">The length.</param> /// <remarks> /// Note that the input array will stored as a transposed array to match the /// target layout. /// </remarks> public void CopyFrom( AcceleratorStream stream, T[,] source, LongIndex2 sourceOffset, LongIndex2 targetOffset, LongIndex2 extent) { if (source == null) { throw new ArgumentNullException(nameof(source)); } if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.X >= source.GetLongLength(0) || sourceOffset.Y >= source.GetLongLength(1)) { throw new ArgumentOutOfRangeException(nameof(sourceOffset)); } if (targetOffset.X < 0 || targetOffset.Y < 0 || targetOffset.X >= Extent.X || targetOffset.Y >= Extent.Y) { throw new ArgumentOutOfRangeException(nameof(targetOffset)); } if (extent.X < 0 || extent.Y < 0 || sourceOffset.X + extent.X > source.GetLongLength(0) || sourceOffset.Y + extent.Y > source.GetLongLength(1) || targetOffset.X + extent.X > Extent.X || targetOffset.Y + extent.Y > Extent.Y) { throw new ArgumentOutOfRangeException(nameof(extent)); } var tempBuffer = new T[extent.Size]; for (long i = 0; i < extent.X; ++i) { for (long j = 0; j < extent.Y; ++j) { var targetIdx = new LongIndex2(i, j). ComputeLinearIndex(extent); tempBuffer[targetIdx] = source[i + sourceOffset.X, j + sourceOffset.Y]; } } buffer.CopyFrom( stream, tempBuffer, 0, targetOffset, extent.Size); }
public TDelegate CreateStreamLauncherDelegate <TDelegate>(AcceleratorStream stream) where TDelegate : class { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } Debug.Assert(implicitStreamLauncher != null, "Invalid implicit stream launcher"); return((implicitStreamLauncher.CreateDelegate(typeof(TDelegate), new ImplicitKernelLauncherArgument(this, stream)) as object) as TDelegate); }
public void CopyTo( AcceleratorStream stream, T[][] target, LongIndex2 sourceOffset, LongIndex2 targetOffset, LongIndex2 extent) { if (target == null) { throw new ArgumentNullException(nameof(target)); } if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.X >= Extent.X || sourceOffset.Y >= Extent.Y) { throw new ArgumentOutOfRangeException(nameof(sourceOffset)); } if (targetOffset.X < 0 || targetOffset.Y < 0 || targetOffset.X >= target.LongLength) { throw new ArgumentOutOfRangeException(nameof(targetOffset)); } if (extent.X < 0 || extent.Y < 0 || sourceOffset.X + extent.X > Extent.X || sourceOffset.Y + extent.Y > Extent.Y || targetOffset.X + extent.X > target.LongLength) { throw new ArgumentOutOfRangeException(nameof(extent)); } var tempBuffer = new T[extent.Size]; buffer.CopyTo(stream, tempBuffer, sourceOffset, 0, extent); for (long i = 0; i < extent.X; ++i) { var subData = target[i + targetOffset.X]; if (subData == null) { continue; } for (long j = 0; j < extent.Y; ++j) { var sourceIdx = new LongIndex2(i, j). ComputeLinearIndex(extent); subData[j + targetOffset.Y] = tempBuffer[sourceIdx]; } } }
/// <summary> /// Loads the given kernel and returns a launcher delegate that is associated /// with the given accelerator stream. Consequently, the resulting delegate /// cannot receive other accelerator streams. /// </summary> /// <param name="method">The method to compile into a kernel.</param> /// <param name="stream">The accelerator stream to use.</param> /// <returns>The loaded kernel-launcher delegate.</returns> public TDelegate LoadAutoGroupedStreamKernel <TDelegate>(MethodInfo method, AcceleratorStream stream) where TDelegate : class { var loader = AutoKernelLoader.Default; var launcher = new StreamLauncherProvider(stream); return(LoadGenericKernel <TDelegate, AutoKernelLoader, StreamLauncherProvider>( method, KernelSpecialization.Empty, ref loader, ref launcher)); }
/// <summary> /// Loads the given kernel and returns a launcher delegate that is associated /// with the given accelerator stream. Consequently, the resulting delegate /// cannot receive other accelerator streams. /// </summary> /// <typeparam name="TDelegate">The delegate type.</typeparam> /// <param name="method">The method to compile into a kernel.</param> /// <param name="stream">The accelerator stream to use.</param> /// <param name="specialization">The kernel specialization.</param> /// <returns>The loaded kernel-launcher delegate.</returns> /// <remarks> /// Note that implictly-grouped kernels will be launched with a group size /// of the current warp size of the accelerator. /// </remarks> public TDelegate LoadStreamKernel <TDelegate>(MethodInfo method, AcceleratorStream stream, KernelSpecialization specialization) where TDelegate : class { var loader = DefaultKernelLoader.Default; var launcher = new StreamLauncherProvider(stream); return(LoadGenericKernel <TDelegate, DefaultKernelLoader, StreamLauncherProvider>( method, specialization, ref loader, ref launcher)); }
public T[,,] GetAs3DArray(AcceleratorStream stream, Index3 offset, Index3 extent) { if (extent.X < 1 || extent.Y < 1 || extent.Z < 1) { throw new ArgumentOutOfRangeException(nameof(extent)); } var result = new T[extent.X, extent.Y, extent.Z]; CopyTo(stream, result, offset, Index3.Zero, extent); return(result); }
public void MemSet( AcceleratorStream stream, byte value, long targetOffsetInBytes, long length) { if (length == 0) { return; } var targetView = AsRawArrayView(targetOffsetInBytes, length); MemSet(stream, value, targetView); }
public void CopyTo( AcceleratorStream stream, T[,] target, Index2 sourceOffset, Index2 targetOffset, Index2 extent) { if (target == null) { throw new ArgumentNullException(nameof(target)); } if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.X >= Extent.X || sourceOffset.Y >= Extent.Y) { throw new ArgumentOutOfRangeException(nameof(sourceOffset)); } if (targetOffset.X < 0 || targetOffset.Y < 0 || targetOffset.X >= target.GetLength(0) || targetOffset.Y >= target.GetLength(1)) { throw new ArgumentOutOfRangeException(nameof(targetOffset)); } if (extent.X < 0 || extent.Y < 0 || sourceOffset.X + extent.X > Extent.X || sourceOffset.Y + extent.Y > Extent.Y || targetOffset.X + extent.X > target.GetLength(0) || targetOffset.Y + extent.Y > target.GetLength(1)) { throw new ArgumentOutOfRangeException(nameof(extent)); } var tempBuffer = new T[extent.Size]; buffer.CopyTo(stream, tempBuffer, sourceOffset, 0, extent); for (int i = 0; i < extent.X; ++i) { for (int j = 0; j < extent.Y; ++j) { var sourceIdx = new Index2(i, j).ComputeLinearIndex(extent); target[i + targetOffset.X, j + targetOffset.Y] = tempBuffer[sourceIdx]; } } }
/// <summary> /// Copies data from the associated accelerator into CPU memory. /// </summary> /// <param name="stream">The stream to use.</param> /// <param name="offset">The element offset.</param> /// <param name="length">The length (number of elements).</param> public void CopyFromAcceleratorAsync( AcceleratorStream stream, long offset, long length) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } var sourceView = GPUView.SubView(offset, length); var targetView = CPUView.SubView(offset, length); sourceView.CopyTo(stream, targetView); }
/// <summary> /// Loads the given kernel and returns a launcher delegate that is associated /// with the given accelerator stream. Consequently, the resulting delegate /// cannot receive other accelerator streams. /// </summary> /// <param name="method">The method to compile into a kernel.</param> /// <param name="customGroupSize">The custom group size to use.</param> /// <param name="stream">The accelerator stream to use.</param> /// <returns>The loaded kernel-launcher delegate.</returns> /// <remarks> /// Note that implictly-grouped kernel will be launched with the given /// group size. /// </remarks> public TDelegate LoadImplicitlyGroupedStreamKernel <TDelegate>( MethodInfo method, int customGroupSize, AcceleratorStream stream) where TDelegate : class { var loader = new GroupedKernelLoader(customGroupSize); var launcher = new StreamLauncherProvider(stream); return(LoadGenericKernel <TDelegate, GroupedKernelLoader, StreamLauncherProvider>( method, new KernelSpecialization(customGroupSize, null), ref loader, ref launcher)); }
/// <summary> /// Copies the current contents into a new 2D array. /// </summary> /// <param name="stream">The used accelerator stream.</param> /// <param name="offset">The offset.</param> /// <param name="extent">The extent (number of elements).</param> /// <returns>A new array holding the requested contents.</returns> public T[,] GetAs2DArray( AcceleratorStream stream, LongIndex2 offset, LongIndex2 extent) { if (extent.X < 1 || extent.Y < 1) { throw new ArgumentOutOfRangeException(nameof(extent)); } var result = new T[extent.X, extent.Y]; CopyTo(stream, result, offset, LongIndex2.Zero, extent); return(result); }
private void InvokeLauncher <T>(T dimension, AcceleratorStream stream, object[] args) where T : IIndex { if (NumParameters != args.Length) { throw new ArgumentException(RuntimeErrorMessages.InvalidNumberOfUniformArgs); } var reflectionArgs = new object[KernelParameterOffset + args.Length]; reflectionArgs[KernelInstanceParamIdx] = this; reflectionArgs[KernelStreamParamIdx] = stream ?? throw new ArgumentNullException(nameof(stream)); reflectionArgs[KernelParamDimensionIdx] = dimension; args.CopyTo(reflectionArgs, KernelParameterOffset); Launcher.Invoke(null, reflectionArgs); }
public void CopyFrom( AcceleratorStream stream, T[,,] source, Index3 sourceOffset, Index3 targetOffset, Index3 extent) { if (extent.X < 0 || extent.Y < 0 || extent.Z < 0 || extent.X > source.GetLength(0) || extent.Y > source.GetLength(1) || extent.Z > source.GetLength(2)) { throw new ArgumentOutOfRangeException(nameof(extent)); } if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.Z < 0 || sourceOffset.X >= extent.X || sourceOffset.Y >= extent.Y || sourceOffset.Z >= extent.Z) { throw new ArgumentOutOfRangeException(nameof(sourceOffset)); } var tempBuffer = new T[extent.Size]; for (int i = 0; i < extent.X; ++i) { for (int j = 0; j < extent.Y; ++j) { for (int k = 0; k < extent.Z; ++k) { var targetIdx = new Index3(i, j, k).ComputeLinearIndex(extent); tempBuffer[targetIdx] = source[ i + sourceOffset.X, j + sourceOffset.Y, k + sourceOffset.Z]; } } } buffer.CopyFrom( stream, tempBuffer, 0, targetOffset, extent.Size); }
/// <summary> /// Loads the given kernel and returns a launcher delegate that is associated /// with the given accelerator stream. Consequently, the resulting delegate /// cannot receive other accelerator streams. /// </summary> /// <param name="method">The method to compile into a kernel.</param> /// <param name="stream">The accelerator stream to use.</param> /// <param name="groupSize">The estimated group size to gain maximum occupancy on this device.</param> /// <param name="minGridSize">The minimum grid size to gain maximum occupancy on this device.</param> /// <returns>The loaded kernel-launcher delegate.</returns> public TDelegate LoadAutoGroupedStreamKernel <TDelegate>( MethodInfo method, AcceleratorStream stream, out int groupSize, out int minGridSize) where TDelegate : class { var loader = AutoKernelLoader.Default; var launcher = new StreamLauncherProvider(stream); var result = LoadGenericKernel <TDelegate, AutoKernelLoader, StreamLauncherProvider>( method, KernelSpecialization.Empty, ref loader, ref launcher); groupSize = loader.GroupSize; minGridSize = loader.MinGridSize; return(result); }
/// <summary> /// Sets the contents of the current buffer to the given byte value. /// </summary> /// <param name="stream">The used accelerator stream.</param> /// <param name="value">The value to write into the memory buffer.</param> /// <param name="offsetInBytes">The raw offset in bytes.</param> /// <param name="lengthInBytes">The raw length in bytes.</param> public void MemSet( AcceleratorStream stream, byte value, long offsetInBytes, long lengthInBytes) { if (lengthInBytes < 0) { throw new ArgumentOutOfRangeException(nameof(lengthInBytes)); } if (offsetInBytes < 0 || offsetInBytes + lengthInBytes > LengthInBytes) { throw new ArgumentOutOfRangeException(nameof(offsetInBytes)); } MemSetInternal( stream, value, offsetInBytes, lengthInBytes); }
/// <summary> /// Copies data from the associated accelerator into CPU memory. /// </summary> /// <param name="stream">The stream to use.</param> /// <param name="acceleratorMemoryOffset">The source memory offset.</param> /// <param name="cpuMemoryOffset">the target memory offset.</param> /// <param name="extent">The extent (number of elements).</param> public void CopyFromAccelerator( AcceleratorStream stream, TIndex acceleratorMemoryOffset, TIndex cpuMemoryOffset, LongIndex1 extent) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } if (!cpuMemoryOffset.InBounds(CPUView.Extent)) { throw new ArgumentOutOfRangeException(nameof(cpuMemoryOffset)); } if (!acceleratorMemoryOffset.InBounds(Extent)) { throw new ArgumentOutOfRangeException(nameof(acceleratorMemoryOffset)); } if (extent < 1 || extent > Length) { throw new ArgumentOutOfRangeException(nameof(extent)); } var linearSourceIndex = acceleratorMemoryOffset.ComputeLinearIndex(Extent); var linearTargetIndex = cpuMemoryOffset.ComputeLinearIndex(CPUView.Extent); if (linearSourceIndex + extent > Length || linearTargetIndex + extent > CPUView.Length) { throw new ArgumentOutOfRangeException(nameof(extent)); } CopyToView( stream, CPUView.GetSubView(cpuMemoryOffset, extent), linearSourceIndex); }
/// <summary> /// Loads the given kernel and returns a launcher delegate that is associated /// with the given accelerator stream. Consequently, the resulting delegate /// cannot receive other accelerator streams. /// </summary> /// <typeparam name="TDelegate">The delegate type.</typeparam> /// <param name="method">The method to compile into a kernel.</param> /// <param name="stream">The accelerator stream to use.</param> /// <returns>The loaded kernel-launcher delegate.</returns> /// <remarks> /// Note that implictly-grouped kernels will be launched with a group size /// of the current warp size of the accelerator. /// </remarks> public TDelegate LoadStreamKernel <TDelegate>(MethodInfo method, AcceleratorStream stream) where TDelegate : class { return(LoadStreamKernel <TDelegate>(method, stream, KernelSpecialization.Empty)); }