Beispiel #1
0
        public void ArrayViewLongLeaIndex(long length)
        {
            using var buffer = Accelerator.Allocate1D <int>(length);
            var expected = Enumerable.Range(0, (int)length).ToArray();

            using (var source = Accelerator.Allocate1D <int>(length))
            {
                source.CopyFromCPU(Accelerator.DefaultStream, expected);
                Execute((int)length, buffer.View, source.View);
            }

            Verify(buffer.View, expected);
        }
Beispiel #2
0
        /// <summary>
        /// Calculate the mandelbrot set on the GPU.
        /// </summary>
        /// <param name="buffer"></param>
        /// <param name="width"></param>
        /// <param name="height"></param>
        /// <param name="max_iterations"></param>
        public static void CalcGPU(int[] buffer, int width, int height, int max_iterations)
        {
            int num_values = buffer.Length;
            var dev_out    = accelerator.Allocate1D <int>(num_values);

            // Launch kernel
            mandelbrot_kernel(num_values, width, height, max_iterations, dev_out.View);
            accelerator.Synchronize();
            dev_out.CopyToCPU(buffer);

            dev_out.Dispose();
            return;
        }
Beispiel #3
0
        /// <summary>
        /// Compiles and launches an auto-grouped implicitly-grouped kernel.
        /// </summary>
        static void CompileAndLaunchAutoGroupedKernel(Accelerator accelerator)
        {
            // Access the current backend for this device
            var backend = accelerator.GetBackend();

            // Resolve and compile method into a kernel
            var method         = typeof(Program).GetMethod(nameof(MyKernel), BindingFlags.NonPublic | BindingFlags.Static);
            var entryPointDesc = EntryPointDescription.FromImplicitlyGroupedKernel(method);
            var compiledKernel = backend.Compile(entryPointDesc, default);

            // Info: If the current accelerator is a CudaAccelerator, we can cast the compiled kernel to a
            // PTXCompiledKernel in order to extract the PTX assembly code.

            // -------------------------------------------------------------------------------
            // Load the implicitly grouped kernel with an automatically determined group size.
            // Note that the kernel has to be disposed manually.
            using (var kernel = accelerator.LoadAutoGroupedKernel(compiledKernel))
            {
                var launcher = kernel.CreateLauncherDelegate <Action <AcceleratorStream, Index1D, ArrayView <int>, int> >();
                // -------------------------------------------------------------------------------

                using (var buffer = accelerator.Allocate1D <int>(1024))
                {
                    // Launch buffer.Length many threads and pass a view to buffer.
                    // You can also use kernel.Launch; however, the generic launch method involves boxing.
                    launcher(
                        accelerator.DefaultStream,
                        (int)buffer.Length,
                        buffer.View,
                        42);

                    // Wait for the kernel to finish...
                    accelerator.Synchronize();

                    // Resolve and verify data
                    var data = buffer.GetAsArray1D();
                    for (int i = 0, e = data.Length; i < e; ++i)
                    {
                        if (data[i] != 42 + i)
                        {
                            Console.WriteLine($"Error at element location {i}: {data[i]} found");
                        }
                    }
                }

                accelerator.Synchronize();
            }
        }
Beispiel #4
0
        /// <summary>
        /// Calculate the mandelbrot set on the GPU.
        /// </summary>
        /// <param name="buffer"></param>
        /// <param name="width"></param>
        /// <param name="height"></param>
        /// <param name="max_iterations"></param>
        public static void CalcGPU(int[] buffer, int width, int height, int max_iterations)
        {
            int num_values = buffer.Length;
            var dev_out    = accelerator.Allocate1D <int>(num_values);

            // Launch kernel
            mandelbrot_kernel(num_values, width, height, max_iterations, dev_out.View);

            // Reads data from the GPU buffer into a new CPU array.
            // Implicitly calls accelerator.DefaultStream.Synchronize() to ensure
            // that the kernel and memory copy are completed first.
            dev_out.CopyToCPU(buffer);

            dev_out.Dispose();
            return;
        }
Beispiel #5
0
        public void VariableSubView(int length)
        {
            using var buffer  = Accelerator.Allocate1D <int>(length);
            using var buffer2 = Accelerator.Allocate1D <int>(length);
            using (var source = Accelerator.Allocate1D <long>(length))
            {
                var expected = Enumerable.Repeat(
                    (long)int.MaxValue << 32 | ushort.MaxValue, length).ToArray();
                source.CopyFromCPU(Accelerator.DefaultStream, expected);
                Execute(length, buffer.View, buffer2.View, source.View);
            }

            Verify(
                buffer.View,
                Enumerable.Repeat((int)ushort.MaxValue, length).ToArray());
            Verify(buffer2.View, Enumerable.Repeat(int.MaxValue, length).ToArray());
        }
Beispiel #6
0
        /// <summary>
        /// Compiles and launches an explicitly grouped kernel.
        /// </summary>
        static void CompileAndLaunchKernel(Accelerator accelerator, int groupSize)
        {
            // Access the current backend for this device
            var backend = accelerator.GetBackend();

            // Resolve and compile method into a kernel
            var method         = typeof(Program).GetMethod(nameof(GroupedKernel), BindingFlags.NonPublic | BindingFlags.Static);
            var entryPointDesc = EntryPointDescription.FromExplicitlyGroupedKernel(method);
            var compiledKernel = backend.Compile(entryPointDesc, default);

            // Info: If the current accelerator is a CudaAccelerator, we can cast the compiled kernel to a
            // PTXCompiledKernel in order to extract the PTX assembly code.

            // -------------------------------------------------------------------------------
            // Load the explicitly grouped kernel
            // Note that the kernel has to be disposed manually.
            using (var kernel = accelerator.LoadKernel(compiledKernel))
            {
                var launcher = kernel.CreateLauncherDelegate <Action <AcceleratorStream, KernelConfig, ArrayView <int>, int> >();
                // -------------------------------------------------------------------------------

                using (var buffer = accelerator.Allocate1D <int>(1024))
                {
                    // You can also use kernel.Launch; however, the generic launch method involves boxing.
                    launcher(
                        accelerator.DefaultStream,
                        (((int)buffer.Length + groupSize - 1) / groupSize, // Compute the number of groups (round up)
                         groupSize),                                       // Use the given group size
                        buffer.View,
                        42);

                    accelerator.Synchronize();

                    // Resolve and verify data
                    var data = buffer.GetAsArray1D();
                    for (int i = 0, e = data.Length; i < e; ++i)
                    {
                        if (data[i] != 42 + i)
                        {
                            Console.WriteLine($"Error at element location {i}: {data[i]} found");
                        }
                    }
                }
            }
        }
Beispiel #7
0
        public void ArrayViewGetSubVariableView(int length)
        {
            using var buffer = Accelerator.Allocate1D <int>(length);
            var sourceData = Enumerable.Range(0, length).Select(t =>
                                                                new Pair <int>()
            {
                First = t, Second = t + 1
            }).ToArray();
            var expected = Enumerable.Range(1, length).ToArray();

            using (var source = Accelerator.Allocate1D <Pair <int> >(length))
            {
                source.CopyFromCPU(Accelerator.DefaultStream, sourceData);
                Execute(length, buffer.View, source.View);
            }

            Verify(buffer.View, expected);
        }
Beispiel #8
0
        static void LaunchKernel(
            Accelerator accelerator,
            Action<Index1D, ArrayView<int>> method,
            int? expectedValue)
        {
            var kernel = accelerator.LoadAutoGroupedStreamKernel(method);
            using (var buffer = accelerator.Allocate1D<int>(1024))
            {
                kernel((int)buffer.Length, buffer.View);

                // Wait for the kernel to finish...
                accelerator.Synchronize();

                if (expectedValue.HasValue)
                {
                    var data = buffer.GetAsArray1D();
                    for (int i = 0, e = data.Length; i < e; ++i)
                        Debug.Assert(data[i] == expectedValue);
                }
            }
        }
Beispiel #9
0
        /// <summary>
        /// A "checked" histogram will indicate to the caller than one of the histogram
        /// bins has overflown - it does not indicate which bin overflowed. The value
        /// of the bin that overflowed will wrap, depending on its data type.
        /// </summary>
        static void SingleBinCheckedHistogram(Accelerator accelerator, int[] values)
        {
            Console.WriteLine("Single bin checked histogram");
            using var buffer = accelerator.Allocate1D(values);

            // Create an histogram with 3 bins.
            using var histogram = accelerator.Allocate1D <long>(3);
            histogram.MemSetToZero();

            // Create a buffer to hold the overflow result.
            using var overflow = accelerator.Allocate1D <int>(1);
            overflow.MemSetToZero();

            accelerator.Histogram <int, Stride1D.Dense, CustomModuloBinOperation>(
                accelerator.DefaultStream,
                buffer.View,
                histogram.View,
                overflow.View);

            var result = histogram.GetAsArray1D();

            for (int i = 0, e = result.Length; i < e; ++i)
            {
                Console.WriteLine($"Histogram[{i}] = {result[i]}");
            }

            var overflowResult = overflow.GetAsArray1D();

            if (overflowResult[0] != 0)
            {
                Console.WriteLine("Histogram overflowed.");
            }
            else
            {
                Console.WriteLine("Histogram did not overflow.");
            }

            Console.WriteLine();
        }
Beispiel #10
0
        /// <summary>
        /// Uses System.GC.AllocateArray to allocated pinned allocate pinned chunks of memory in CPU host memory.
        /// </summary>
        /// <param name="accelerator">The current accelerator.</param>
        /// <param name="dataSize">The number of elements to copy.</param>
        static void PerformPinnedCopyUsingGCAllocateArray(Accelerator accelerator, int dataSize)
        {
            var array = GC.AllocateArray <int>(dataSize, pinned: true);

            // Allocate buffer on this device
            using (var bufferOnGPU = accelerator.Allocate1D <int>(array.Length))
            {
                var stream = accelerator.DefaultStream;

                // Page locked buffers enable async memory transfers
                using (var scope = accelerator.CreatePageLockFromPinned(array))
                {
                    bufferOnGPU.View.CopyFromPageLockedAsync(stream, scope);

                    //
                    // Perform other operations...
                    //

                    // Wait for the copy operation to finish
                    stream.Synchronize();
                }
            }
        }
Beispiel #11
0
        static void LaunchKernel(
            Accelerator accelerator,
            Action <Index1D, ArrayView <int>, int> launcher)
        {
            using (var buffer = accelerator.Allocate1D <int>(1024))
            {
                // Launch buffer.Length many threads and pass a view to buffer
                launcher((int)buffer.Length, buffer.View, 42);

                // Wait for the kernel to finish...
                accelerator.Synchronize();

                // Resolve and verify data
                var data = buffer.GetAsArray1D();
                for (int i = 0, e = data.Length; i < e; ++i)
                {
                    if (data[i] != 42 + i)
                    {
                        Console.WriteLine($"Error at element location {i}: {data[i]} found");
                    }
                }
            }
        }
Beispiel #12
0
        /// <summary>
        /// Allocates a 1D buffer on the given accelerator and transfers memory
        /// to and from the buffer.
        /// </summary>
        /// <param name="accelerator">The target accelerator.</param>
        static void Alloc1D(Accelerator accelerator)
        {
            Console.WriteLine($"Performing 1D allocation on {accelerator.Name}");
            var data       = Enumerable.Range(0, AllocationSize1D).ToArray();
            var targetData = new int[AllocationSize1D];

            using (var buffer = accelerator.Allocate1D <int>(data.Length))
            {
                // Copy to accelerator
                buffer.CopyFromCPU(data);

                // Copy from accelerator
                buffer.CopyToCPU(targetData);
            }

            // Verify data
            for (int i = 0; i < AllocationSize1D; ++i)
            {
                if (data[i] != targetData[i])
                {
                    Console.WriteLine($"Error comparing data and target data at {i}: {targetData[i]} found, but {data[i]} expected");
                }
            }
        }
Beispiel #13
0
        static void LaunchKernel(
            Accelerator accelerator,
            Action <Index1D, ArrayView <double>, double> method)
        {
            Console.WriteLine("Launching: " + method.Method.Name);

            var kernel = accelerator.LoadAutoGroupedStreamKernel(method);

            using (var buffer = accelerator.Allocate1D <double>(1))
            {
                buffer.MemSetToZero();

                kernel(1024, buffer.View, 2.0);

                // Wait for the kernel to finish...
                accelerator.Synchronize();

                var data = buffer.GetAsArray1D();
                for (int i = 0, e = data.Length; i < e; ++i)
                {
                    Console.WriteLine($"Data[{i}] = {data[i]}");
                }
            }
        }
Beispiel #14
0
 internal ScanProvider(Accelerator accelerator, LongIndex1D dataLength)
     : base(accelerator)
 {
     tempBuffer = accelerator.Allocate1D <int>(dataLength);
 }