public void ArrayViewLongLeaIndex(long length) { using var buffer = Accelerator.Allocate1D <int>(length); var expected = Enumerable.Range(0, (int)length).ToArray(); using (var source = Accelerator.Allocate1D <int>(length)) { source.CopyFromCPU(Accelerator.DefaultStream, expected); Execute((int)length, buffer.View, source.View); } Verify(buffer.View, expected); }
/// <summary> /// Calculate the mandelbrot set on the GPU. /// </summary> /// <param name="buffer"></param> /// <param name="width"></param> /// <param name="height"></param> /// <param name="max_iterations"></param> public static void CalcGPU(int[] buffer, int width, int height, int max_iterations) { int num_values = buffer.Length; var dev_out = accelerator.Allocate1D <int>(num_values); // Launch kernel mandelbrot_kernel(num_values, width, height, max_iterations, dev_out.View); accelerator.Synchronize(); dev_out.CopyToCPU(buffer); dev_out.Dispose(); return; }
/// <summary> /// Compiles and launches an auto-grouped implicitly-grouped kernel. /// </summary> static void CompileAndLaunchAutoGroupedKernel(Accelerator accelerator) { // Access the current backend for this device var backend = accelerator.GetBackend(); // Resolve and compile method into a kernel var method = typeof(Program).GetMethod(nameof(MyKernel), BindingFlags.NonPublic | BindingFlags.Static); var entryPointDesc = EntryPointDescription.FromImplicitlyGroupedKernel(method); var compiledKernel = backend.Compile(entryPointDesc, default); // Info: If the current accelerator is a CudaAccelerator, we can cast the compiled kernel to a // PTXCompiledKernel in order to extract the PTX assembly code. // ------------------------------------------------------------------------------- // Load the implicitly grouped kernel with an automatically determined group size. // Note that the kernel has to be disposed manually. using (var kernel = accelerator.LoadAutoGroupedKernel(compiledKernel)) { var launcher = kernel.CreateLauncherDelegate <Action <AcceleratorStream, Index1D, ArrayView <int>, int> >(); // ------------------------------------------------------------------------------- using (var buffer = accelerator.Allocate1D <int>(1024)) { // Launch buffer.Length many threads and pass a view to buffer. // You can also use kernel.Launch; however, the generic launch method involves boxing. launcher( accelerator.DefaultStream, (int)buffer.Length, buffer.View, 42); // Wait for the kernel to finish... accelerator.Synchronize(); // Resolve and verify data var data = buffer.GetAsArray1D(); for (int i = 0, e = data.Length; i < e; ++i) { if (data[i] != 42 + i) { Console.WriteLine($"Error at element location {i}: {data[i]} found"); } } } accelerator.Synchronize(); } }
/// <summary> /// Calculate the mandelbrot set on the GPU. /// </summary> /// <param name="buffer"></param> /// <param name="width"></param> /// <param name="height"></param> /// <param name="max_iterations"></param> public static void CalcGPU(int[] buffer, int width, int height, int max_iterations) { int num_values = buffer.Length; var dev_out = accelerator.Allocate1D <int>(num_values); // Launch kernel mandelbrot_kernel(num_values, width, height, max_iterations, dev_out.View); // Reads data from the GPU buffer into a new CPU array. // Implicitly calls accelerator.DefaultStream.Synchronize() to ensure // that the kernel and memory copy are completed first. dev_out.CopyToCPU(buffer); dev_out.Dispose(); return; }
public void VariableSubView(int length) { using var buffer = Accelerator.Allocate1D <int>(length); using var buffer2 = Accelerator.Allocate1D <int>(length); using (var source = Accelerator.Allocate1D <long>(length)) { var expected = Enumerable.Repeat( (long)int.MaxValue << 32 | ushort.MaxValue, length).ToArray(); source.CopyFromCPU(Accelerator.DefaultStream, expected); Execute(length, buffer.View, buffer2.View, source.View); } Verify( buffer.View, Enumerable.Repeat((int)ushort.MaxValue, length).ToArray()); Verify(buffer2.View, Enumerable.Repeat(int.MaxValue, length).ToArray()); }
/// <summary> /// Compiles and launches an explicitly grouped kernel. /// </summary> static void CompileAndLaunchKernel(Accelerator accelerator, int groupSize) { // Access the current backend for this device var backend = accelerator.GetBackend(); // Resolve and compile method into a kernel var method = typeof(Program).GetMethod(nameof(GroupedKernel), BindingFlags.NonPublic | BindingFlags.Static); var entryPointDesc = EntryPointDescription.FromExplicitlyGroupedKernel(method); var compiledKernel = backend.Compile(entryPointDesc, default); // Info: If the current accelerator is a CudaAccelerator, we can cast the compiled kernel to a // PTXCompiledKernel in order to extract the PTX assembly code. // ------------------------------------------------------------------------------- // Load the explicitly grouped kernel // Note that the kernel has to be disposed manually. using (var kernel = accelerator.LoadKernel(compiledKernel)) { var launcher = kernel.CreateLauncherDelegate <Action <AcceleratorStream, KernelConfig, ArrayView <int>, int> >(); // ------------------------------------------------------------------------------- using (var buffer = accelerator.Allocate1D <int>(1024)) { // You can also use kernel.Launch; however, the generic launch method involves boxing. launcher( accelerator.DefaultStream, (((int)buffer.Length + groupSize - 1) / groupSize, // Compute the number of groups (round up) groupSize), // Use the given group size buffer.View, 42); accelerator.Synchronize(); // Resolve and verify data var data = buffer.GetAsArray1D(); for (int i = 0, e = data.Length; i < e; ++i) { if (data[i] != 42 + i) { Console.WriteLine($"Error at element location {i}: {data[i]} found"); } } } } }
public void ArrayViewGetSubVariableView(int length) { using var buffer = Accelerator.Allocate1D <int>(length); var sourceData = Enumerable.Range(0, length).Select(t => new Pair <int>() { First = t, Second = t + 1 }).ToArray(); var expected = Enumerable.Range(1, length).ToArray(); using (var source = Accelerator.Allocate1D <Pair <int> >(length)) { source.CopyFromCPU(Accelerator.DefaultStream, sourceData); Execute(length, buffer.View, source.View); } Verify(buffer.View, expected); }
static void LaunchKernel( Accelerator accelerator, Action<Index1D, ArrayView<int>> method, int? expectedValue) { var kernel = accelerator.LoadAutoGroupedStreamKernel(method); using (var buffer = accelerator.Allocate1D<int>(1024)) { kernel((int)buffer.Length, buffer.View); // Wait for the kernel to finish... accelerator.Synchronize(); if (expectedValue.HasValue) { var data = buffer.GetAsArray1D(); for (int i = 0, e = data.Length; i < e; ++i) Debug.Assert(data[i] == expectedValue); } } }
/// <summary> /// A "checked" histogram will indicate to the caller than one of the histogram /// bins has overflown - it does not indicate which bin overflowed. The value /// of the bin that overflowed will wrap, depending on its data type. /// </summary> static void SingleBinCheckedHistogram(Accelerator accelerator, int[] values) { Console.WriteLine("Single bin checked histogram"); using var buffer = accelerator.Allocate1D(values); // Create an histogram with 3 bins. using var histogram = accelerator.Allocate1D <long>(3); histogram.MemSetToZero(); // Create a buffer to hold the overflow result. using var overflow = accelerator.Allocate1D <int>(1); overflow.MemSetToZero(); accelerator.Histogram <int, Stride1D.Dense, CustomModuloBinOperation>( accelerator.DefaultStream, buffer.View, histogram.View, overflow.View); var result = histogram.GetAsArray1D(); for (int i = 0, e = result.Length; i < e; ++i) { Console.WriteLine($"Histogram[{i}] = {result[i]}"); } var overflowResult = overflow.GetAsArray1D(); if (overflowResult[0] != 0) { Console.WriteLine("Histogram overflowed."); } else { Console.WriteLine("Histogram did not overflow."); } Console.WriteLine(); }
/// <summary> /// Uses System.GC.AllocateArray to allocated pinned allocate pinned chunks of memory in CPU host memory. /// </summary> /// <param name="accelerator">The current accelerator.</param> /// <param name="dataSize">The number of elements to copy.</param> static void PerformPinnedCopyUsingGCAllocateArray(Accelerator accelerator, int dataSize) { var array = GC.AllocateArray <int>(dataSize, pinned: true); // Allocate buffer on this device using (var bufferOnGPU = accelerator.Allocate1D <int>(array.Length)) { var stream = accelerator.DefaultStream; // Page locked buffers enable async memory transfers using (var scope = accelerator.CreatePageLockFromPinned(array)) { bufferOnGPU.View.CopyFromPageLockedAsync(stream, scope); // // Perform other operations... // // Wait for the copy operation to finish stream.Synchronize(); } } }
static void LaunchKernel( Accelerator accelerator, Action <Index1D, ArrayView <int>, int> launcher) { using (var buffer = accelerator.Allocate1D <int>(1024)) { // Launch buffer.Length many threads and pass a view to buffer launcher((int)buffer.Length, buffer.View, 42); // Wait for the kernel to finish... accelerator.Synchronize(); // Resolve and verify data var data = buffer.GetAsArray1D(); for (int i = 0, e = data.Length; i < e; ++i) { if (data[i] != 42 + i) { Console.WriteLine($"Error at element location {i}: {data[i]} found"); } } } }
/// <summary> /// Allocates a 1D buffer on the given accelerator and transfers memory /// to and from the buffer. /// </summary> /// <param name="accelerator">The target accelerator.</param> static void Alloc1D(Accelerator accelerator) { Console.WriteLine($"Performing 1D allocation on {accelerator.Name}"); var data = Enumerable.Range(0, AllocationSize1D).ToArray(); var targetData = new int[AllocationSize1D]; using (var buffer = accelerator.Allocate1D <int>(data.Length)) { // Copy to accelerator buffer.CopyFromCPU(data); // Copy from accelerator buffer.CopyToCPU(targetData); } // Verify data for (int i = 0; i < AllocationSize1D; ++i) { if (data[i] != targetData[i]) { Console.WriteLine($"Error comparing data and target data at {i}: {targetData[i]} found, but {data[i]} expected"); } } }
static void LaunchKernel( Accelerator accelerator, Action <Index1D, ArrayView <double>, double> method) { Console.WriteLine("Launching: " + method.Method.Name); var kernel = accelerator.LoadAutoGroupedStreamKernel(method); using (var buffer = accelerator.Allocate1D <double>(1)) { buffer.MemSetToZero(); kernel(1024, buffer.View, 2.0); // Wait for the kernel to finish... accelerator.Synchronize(); var data = buffer.GetAsArray1D(); for (int i = 0, e = data.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {data[i]}"); } } }
internal ScanProvider(Accelerator accelerator, LongIndex1D dataLength) : base(accelerator) { tempBuffer = accelerator.Allocate1D <int>(dataLength); }