Ejemplo n.º 1
0
        static void Main()
        {
            using (var context = new Context())
            {
                // For each available accelerator...
                foreach (var acceleratorId in Accelerator.Accelerators)
                {
                    // Create default accelerator for the given accelerator id
                    using (var accelerator = Accelerator.Create(context, acceleratorId))
                    {
                        Console.WriteLine($"Performing operations on {accelerator}");
                        int groupSize = accelerator.MaxNumThreadsPerGroup;

                        // Scenario 1: simple version
                        using (var buffer = accelerator.Allocate <int>(groupSize))
                        {
                            var kernel = accelerator.LoadStreamKernel <
                                ArrayView <int>,
                                SpecializedValue <int> >(SpecializedKernel);
                            kernel((1, groupSize), buffer.View, SpecializedValue.New(2));
                            kernel((1, groupSize), buffer.View, SpecializedValue.New(23));
                            kernel((1, groupSize), buffer.View, SpecializedValue.New(42));
                        }

                        // Scenario 2: custom structure
                        using (var buffer = accelerator.Allocate <int>(groupSize))
                        {
                            var kernel = accelerator.LoadStreamKernel <
                                ArrayView <int>,
                                SpecializedValue <CustomStruct> >(SpecializedCustomStructKernel);
                            kernel(
                                (1, groupSize),
                                buffer.View,
                                SpecializedValue.New(
                                    new CustomStruct(1, 7)));
                            kernel(
                                (1, groupSize),
                                buffer.View,
                                SpecializedValue.New(
                                    new CustomStruct(23, 42)));
                        }

                        // Scenario 3: generic kernel
                        using (var buffer = accelerator.Allocate <long>(groupSize))
                        {
                            var kernel = accelerator.LoadStreamKernel <
                                ArrayView <long>,
                                SpecializedValue <long> >(SpecializedGenericKernel);
                            kernel((1, groupSize), buffer.View, SpecializedValue.New(23L));
                            kernel((1, groupSize), buffer.View, SpecializedValue.New(42L));
                        }
                    }
                }
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Demonstrates using the mul.hi.u64 and mul.lo.u64 inline PTX instructions to
        /// multiply two UInt64 values to produce a UInt128 value.
        /// </summary>
        static void MultiplyUInt128(CudaAccelerator accelerator)
        {
            using var buffer = accelerator.Allocate1D <UInt128>(1024);
            var kernel = accelerator.LoadAutoGroupedStreamKernel <Index1D, ArrayView <UInt128>, SpecializedValue <ulong> >(MultiplyUInt128Kernel);

            kernel(
                (int)buffer.Length,
                buffer.View,
                SpecializedValue.New(ulong.MaxValue));

            var results = buffer.GetAsArray1D();

            for (var i = 0; i < results.Length; i++)
            {
                Console.WriteLine($"[{i}] = {results[i]}");
            }
        }
Ejemplo n.º 3
0
        private static void IlGpuOptimisedImpl(
            CudaAccelerator gpu,
            Real[] mSquaredDistances,
            Real[] mCoordinates,
            int c,
            int n,
            string name,
            Action <ArrayView2D <Real>, ArrayView <Real>, SpecializedValue <int>, SpecializedValue <int>, int> kernelFunc)
        {
            using var cudaSquaredDistance = gpu.Allocate <Real>(n, n);
            using var cudaCoordinates     = gpu.Allocate(mCoordinates);
            var timer = Stopwatch.StartNew();

            const int blockSize = 128;
            var       gridSize  = Util.DivUp(n, blockSize);
            var       lp        = ((gridSize, gridSize, 1), (blockSize, 1, 1));

            gpu.Launch(kernelFunc, gpu.DefaultStream, lp, cudaSquaredDistance.View, cudaCoordinates.View, SpecializedValue.New(blockSize), SpecializedValue.New(c), n);
            gpu.Synchronize();

            Util.PrintPerformance(timer, name, n, c, n);

            cudaSquaredDistance.CopyTo(mSquaredDistances, (0, 0), 0, (n, n));
        }