Ejemplo n.º 1
0
 internal static void SpecializedExplicitValueKernel <T>(
     ArrayView1D <T, Stride1D.Dense> data,
     SpecializedValue <T> value)
     where T : unmanaged, IEquatable <T>
 {
     data[0] = value;
 }
Ejemplo n.º 2
0
        public Camera(Camera camera, Vec3 movement, Vec3 turn)
        {
            this.width      = camera.width;
            this.height     = camera.height;
            this.maxBounces = camera.maxBounces;

            Vector4 temp = camera.lookAt - camera.origin;

            if (turn.y != 0)
            {
                temp += Vector4.Transform(temp, Matrix4x4.CreateFromAxisAngle(Vec3.cross(Vec3.cross(camera.up, (camera.lookAt - camera.origin)), (camera.lookAt - camera.origin)), (float)turn.y));
            }
            if (turn.x != 0)
            {
                temp += Vector4.Transform(temp, Matrix4x4.CreateFromAxisAngle(Vec3.cross(camera.up, (camera.lookAt - camera.origin)), (float)turn.x));
            }

            lookAt = camera.origin + Vec3.unitVector(temp);

            this.origin  = camera.origin + movement;
            this.lookAt += movement;
            this.up      = camera.up;

            axis = OrthoNormalBasis.fromZY(Vec3.unitVector(lookAt - origin), up);

            aspectRatio      = ((float)width / (float)height);
            cameraPlaneDist  = 1.0f / XMath.Tan(camera.verticalFov * XMath.PI / 360.0f);
            this.verticalFov = camera.verticalFov;
            reciprocalHeight = 1.0f / height;
            reciprocalWidth  = 1.0f / width;
        }
Ejemplo n.º 3
0
 internal static void SpecializedImplicitValueKernel <T>(
     Index1 _,
     ArrayView <T> data,
     SpecializedValue <T> value)
     where T : unmanaged, IEquatable <T>
 {
     data[0] = value;
 }
Ejemplo n.º 4
0
        // The specialization also works with generic kernels
        static void SpecializedGenericKernel <TValue>(
            ArrayView <TValue> view,
            SpecializedValue <TValue> specialized)
            where TValue : unmanaged, IEquatable <TValue>
        {
            var globalIndex = Grid.GlobalIndex.X;

            view[globalIndex] = specialized;
        }
Ejemplo n.º 5
0
        private static void IlGpuKernelConstants(
            ArrayView2D <Real> mSquaredDistances,
            ArrayView <Real> mCoordinates,
            SpecializedValue <int> c,
            int n)
        {
            // Same as CudaKernelOptimised2, but the number of coordinates is given as a meta-constant.
            // Also, we write the results as float2.

            var shared       = SharedMemory.GetDynamic <Real>();
            var coordinatesI = shared.GetSubView(0, c * Group.DimX);
            var coordinatesJ = shared.GetSubView(c * Group.DimX);

            var bI = Grid.IdxY * Group.DimX;
            var bJ = Grid.IdxX * Group.DimX;

            for (int k = 0; k != c; ++k)
            {
                if (bI + Group.IdxX < n)
                {
                    coordinatesI[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bI + Group.IdxX];
                }

                if (bJ + Group.IdxX < n)
                {
                    coordinatesJ[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bJ + Group.IdxX];
                }
            }

            Group.Barrier();

            var line = Group.IdxX / (Group.DimX / 2);
            var tid  = Group.IdxX % (Group.DimX / 2);

            if (bJ + tid * 2 < n)
            {
                var coordinatesJ2 = coordinatesJ.Cast <IlReal2>();

                for (int i = line; i < Group.DimX & bI + i < n; i += 2)
                {
                    var dist = default(IlReal2);

                    for (int k = 0; k != c; ++k)
                    {
                        var coord1 = coordinatesI[k * Group.DimX + i];
                        var coord2 = coordinatesJ2[(k * Group.DimX / 2) + tid];
                        var diff   = new IlReal2(coord1 - coord2.X, coord1 - coord2.Y);

                        dist += diff * diff;
                    }

                    var dst = mSquaredDistances.Cast <IlReal2>();
                    dst[bJ / 2 + tid, bI + i] = dist;
                }
            }
        }
Ejemplo n.º 6
0
        // A simple kernel with a specialized kernel parameter
        public static void SpecializedKernel(
            ArrayView <int> view,
            // This parameter will be replaced by a 'constant' value during
            // the first kernel call
            SpecializedValue <int> specialized)
        {
            var globalIndex = Grid.GlobalIndex.X;

            view[globalIndex] = specialized;
        }
Ejemplo n.º 7
0
        static void Main()
        {
            using (var context = new Context())
            {
                // For each available accelerator...
                foreach (var acceleratorId in Accelerator.Accelerators)
                {
                    // Create default accelerator for the given accelerator id
                    using (var accelerator = Accelerator.Create(context, acceleratorId))
                    {
                        Console.WriteLine($"Performing operations on {accelerator}");
                        int groupSize = accelerator.MaxNumThreadsPerGroup;

                        // Scenario 1: simple version
                        using (var buffer = accelerator.Allocate <int>(groupSize))
                        {
                            var kernel = accelerator.LoadStreamKernel <
                                ArrayView <int>,
                                SpecializedValue <int> >(SpecializedKernel);
                            kernel((1, groupSize), buffer.View, SpecializedValue.New(2));
                            kernel((1, groupSize), buffer.View, SpecializedValue.New(23));
                            kernel((1, groupSize), buffer.View, SpecializedValue.New(42));
                        }

                        // Scenario 2: custom structure
                        using (var buffer = accelerator.Allocate <int>(groupSize))
                        {
                            var kernel = accelerator.LoadStreamKernel <
                                ArrayView <int>,
                                SpecializedValue <CustomStruct> >(SpecializedCustomStructKernel);
                            kernel(
                                (1, groupSize),
                                buffer.View,
                                SpecializedValue.New(
                                    new CustomStruct(1, 7)));
                            kernel(
                                (1, groupSize),
                                buffer.View,
                                SpecializedValue.New(
                                    new CustomStruct(23, 42)));
                        }

                        // Scenario 3: generic kernel
                        using (var buffer = accelerator.Allocate <long>(groupSize))
                        {
                            var kernel = accelerator.LoadStreamKernel <
                                ArrayView <long>,
                                SpecializedValue <long> >(SpecializedGenericKernel);
                            kernel((1, groupSize), buffer.View, SpecializedValue.New(23L));
                            kernel((1, groupSize), buffer.View, SpecializedValue.New(42L));
                        }
                    }
                }
            }
        }
Ejemplo n.º 8
0
        private static void IlGpuKernelLocalMemory(
            ArrayView2D <Real> mSquaredDistances,
            ArrayView <Real> mCoordinates,
            SpecializedValue <int> dimX,
            SpecializedValue <int> c,
            int n)
        {
            // Same as KernelConstants, but use both local and shared memory to increase the effective shared memory.

            var coordinatesI = SharedMemory.Allocate <Real>(c * dimX);
            var coordinatesJ = new IlReal2[c.Value];

            var bI       = Grid.IdxY * dimX;
            var bJ       = Grid.IdxX * dimX;
            var line     = Group.IdxX / (dimX / 2);
            var tid      = Group.IdxX % (dimX / 2);
            var isActive = bJ + tid * 2 < n;

            for (int k = 0; k != c.Value; ++k)
            {
                if (bI + Group.IdxX < n)
                {
                    coordinatesI[k * dimX + Group.IdxX] = mCoordinates[k * n + bI + Group.IdxX];
                }

                if (isActive)
                {
                    var mCoordinates2 = mCoordinates.Cast <IlReal2>();
                    coordinatesJ[k] = mCoordinates2[(k * n + bJ) / 2 + tid];
                }
            }

            Group.Barrier();

            if (isActive)
            {
                for (int i = line; i < dimX && bI + i < n; i += 2)
                {
                    var dist = default(IlReal2);

                    for (int k = 0; k != c.Value; ++k)
                    {
                        var coord1 = coordinatesI[k * dimX + i];
                        var coord2 = coordinatesJ[k];
                        var diff   = new IlReal2(coord1 - coord2.X, coord1 - coord2.Y);

                        dist += diff * diff;
                    }

                    var dst = mSquaredDistances.Cast <IlReal2>();
                    dst[bJ / 2 + tid, bI + i] = dist;
                }
            }
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Kernel that multiplies two UInt64 values to produce a UInt64 value.
        /// </summary>
        public static void MultiplyUInt128Kernel(
            Index1D index,
            ArrayView <UInt128> buffer,
            SpecializedValue <ulong> constant)
        {
            // NB: Need to convert index.X to ulong, so that %2 will use the correct PTX register type.
            ulong multiplier = (ulong)index.X;

            CudaAsm.Emit("mul.hi.u64 %0, %1, %2;", out ulong high, constant.Value, multiplier);
            CudaAsm.Emit("mul.lo.u64 %0, %1, %2;", out ulong low, constant.Value, multiplier);

            buffer[index] = new UInt128(high, low);
        }
Ejemplo n.º 10
0
        // The specialization functionality supports user-defined types, as long as they
        // are value types and implement the IEquatable interface (and have useful
        // GetHashCode and Equals implementations).
        public static void SpecializedCustomStructKernel(
            ArrayView <int> view,
            SpecializedValue <CustomStruct> specialized)
        {
            var globalIndex = Grid.GlobalIndex.X;

            // Note that an implicit conversion from a specialized value to
            // a non-specialized value is possible. But: not the other way around ;)
            CustomStruct customValue = specialized;

            // The value is specialized and the additional optimization passes will
            // perform constant propagation to create an 'optimized' store with a single constant
            // value (in this case)
            view[globalIndex] = customValue.Value1 + customValue.Value2;
        }
Ejemplo n.º 11
0
    public Camera(Vec3 origin, Vec3 lookAt, Vec3 up, int width, int height, float verticalFov)
    {
        this.width       = new SpecializedValue <int>(width);
        this.height      = new SpecializedValue <int>(height);
        this.verticalFov = verticalFov;
        this.origin      = origin;
        this.lookAt      = lookAt;
        this.up          = up;

        axis = OrthoNormalBasis.fromZY(Vec3.unitVector(lookAt - origin), up);

        aspectRatio      = ((float)width / (float)height);
        cameraPlaneDist  = 1.0f / XMath.Tan(verticalFov * XMath.PI / 360.0f);
        reciprocalHeight = 1.0f / height;
        reciprocalWidth  = 1.0f / width;
    }
Ejemplo n.º 12
0
        /// <summary>
        /// Demonstrates using the mul.hi.u64 and mul.lo.u64 inline PTX instructions to
        /// multiply two UInt64 values to produce a UInt128 value.
        /// </summary>
        static void MultiplyUInt128(CudaAccelerator accelerator)
        {
            using var buffer = accelerator.Allocate1D <UInt128>(1024);
            var kernel = accelerator.LoadAutoGroupedStreamKernel <Index1D, ArrayView <UInt128>, SpecializedValue <ulong> >(MultiplyUInt128Kernel);

            kernel(
                (int)buffer.Length,
                buffer.View,
                SpecializedValue.New(ulong.MaxValue));

            var results = buffer.GetAsArray1D();

            for (var i = 0; i < results.Length; i++)
            {
                Console.WriteLine($"[{i}] = {results[i]}");
            }
        }
Ejemplo n.º 13
0
        private static void IlGpuOptimisedImpl(
            CudaAccelerator gpu,
            Real[] mSquaredDistances,
            Real[] mCoordinates,
            int c,
            int n,
            string name,
            Action <ArrayView2D <Real>, ArrayView <Real>, SpecializedValue <int>, SpecializedValue <int>, int> kernelFunc)
        {
            using var cudaSquaredDistance = gpu.Allocate <Real>(n, n);
            using var cudaCoordinates     = gpu.Allocate(mCoordinates);
            var timer = Stopwatch.StartNew();

            const int blockSize = 128;
            var       gridSize  = Util.DivUp(n, blockSize);
            var       lp        = ((gridSize, gridSize, 1), (blockSize, 1, 1));

            gpu.Launch(kernelFunc, gpu.DefaultStream, lp, cudaSquaredDistance.View, cudaCoordinates.View, SpecializedValue.New(blockSize), SpecializedValue.New(c), n);
            gpu.Synchronize();

            Util.PrintPerformance(timer, name, n, c, n);

            cudaSquaredDistance.CopyTo(mSquaredDistances, (0, 0), 0, (n, n));
        }
Ejemplo n.º 14
0
        /// <summary>
        /// The actual unique kernel implementation.
        /// </summary>
        /// <typeparam name="T">The element type.</typeparam>
        /// <typeparam name="TComparisonOperation">The comparison operation.</typeparam>
        /// <param name="input">The input view.</param>
        /// <param name="output">The output view to store the new length.</param>
        /// <param name="sequentialGroupExecutor">
        /// The sequential group executor to use.
        /// </param>
        /// <param name="tileSize">The tile size.</param>
        /// <param name="numIterationsPerGroup">
        /// The number of iterations per group.
        /// </param>
        internal static void UniqueKernel <T, TComparisonOperation>(
            ArrayView <T> input,
            ArrayView <long> output,
            SequentialGroupExecutor sequentialGroupExecutor,
            SpecializedValue <int> tileSize,
            Index1D numIterationsPerGroup)
            where T : unmanaged
            where TComparisonOperation : struct, IComparisonOperation <T>
        {
            TComparisonOperation comparison = default;
            var isFirstGrid = Grid.IdxX == 0;
            var tileInfo    = new TileInfo(input.IntLength, numIterationsPerGroup);

            // Sync groups and wait for the current one to become active
            sequentialGroupExecutor.Wait();

            var temp     = SharedMemory.Allocate <bool>(tileSize);
            var startIdx = Grid.ComputeGlobalIndex(Grid.IdxX, 0);

            for (
                int i = tileInfo.StartIndex;
                i < tileInfo.MaxLength;
                i += Group.DimX)
            {
                if (Group.IsFirstThread && i == tileInfo.StartIndex && isFirstGrid)
                {
                    temp[0] = true;
                }
                else
                {
                    var currIdx = i;
                    var prevIdx = Group.IsFirstThread && i == tileInfo.StartIndex
                        ? output[0] - 1
                        : currIdx - 1;

                    temp[currIdx - startIdx] =
                        comparison.Compare(input[currIdx], input[prevIdx]) != 0;
                }
            }
            Group.Barrier();

            if (Group.IsFirstThread)
            {
                var offset    = isFirstGrid ? 0 : output[0];
                var maxLength =
                    XMath.Min(startIdx + temp.IntLength, tileInfo.MaxLength) - startIdx;

                for (var i = 0; i < maxLength; i++)
                {
                    if (temp[i])
                    {
                        input[offset++] = input[startIdx + i];
                    }
                }
                output[0] = offset;
            }

            MemoryFence.DeviceLevel();
            Group.Barrier();
            sequentialGroupExecutor.Release();
        }
Ejemplo n.º 15
0
        /// <summary>
        /// Performs the first radix-sort pass.
        /// </summary>
        /// <typeparam name="T">The element type.</typeparam>
        /// <typeparam name="TOperation">The radix-sort operation.</typeparam>
        /// <typeparam name="TSpecialization">The specialization type.</typeparam>
        /// <param name="view">The input view to use.</param>
        /// <param name="counter">The global counter view.</param>
        /// <param name="groupSize">The number of threads in the group.</param>
        /// <param name="numGroups">The number of virtually launched groups.</param>
        /// <param name="paddedLength">The padded length of the input view.</param>
        /// <param name="shift">The bit shift to use.</param>
        internal static void RadixSortKernel1 <T, TOperation, TSpecialization>(
            ArrayView <T> view,
            ArrayView <int> counter,
            SpecializedValue <int> groupSize,
            int numGroups,
            int paddedLength,
            int shift)
            where T : unmanaged
            where TOperation : struct, IRadixSortOperation <T>
            where TSpecialization : struct, IRadixSortSpecialization
        {
            TSpecialization specialization = default;
            var             scanMemory     = SharedMemory.Allocate <int>(
                groupSize * specialization.UnrollFactor);

            int gridIdx = Grid.IdxX;

            for (
                int i = Grid.GlobalIndex.X;
                i < paddedLength;
                i += GridExtensions.GridStrideLoopStride)
            {
                bool inRange = i < view.Length;

                // Read value from global memory
                TOperation operation = default;
                T          value     = operation.DefaultValue;
                if (inRange)
                {
                    value = view[i];
                }
                var bits = operation.ExtractRadixBits(
                    value,
                    shift,
                    specialization.UnrollFactor - 1);

                for (int j = 0; j < specialization.UnrollFactor; ++j)
                {
                    scanMemory[Group.IdxX + groupSize * j] = 0;
                }
                if (inRange)
                {
                    scanMemory[Group.IdxX + groupSize * bits] = 1;
                }
                Group.Barrier();

                for (int j = 0; j < specialization.UnrollFactor; ++j)
                {
                    var address = Group.IdxX + groupSize * j;
                    scanMemory[address] =
                        GroupExtensions.ExclusiveScan <int, AddInt32>(scanMemory[address]);
                }
                Group.Barrier();

                if (Group.IdxX == Group.DimX - 1)
                {
                    // Write counters to global memory
                    for (int j = 0; j < specialization.UnrollFactor; ++j)
                    {
                        ref var newOffset = ref scanMemory[Group.IdxX + groupSize * j];
                        newOffset += Utilities.Select(inRange & j == bits, 1, 0);
                        counter[j * numGroups + gridIdx] = newOffset;
                    }
                }
                Group.Barrier();

                var    gridSize = gridIdx * Group.DimX;
                Index1 pos      = gridSize + scanMemory[Group.IdxX + groupSize * bits] -
                                  Utilities.Select(inRange & Group.IdxX == Group.DimX - 1, 1, 0);
                for (int j = 1; j <= bits; ++j)
                {
                    pos += scanMemory[groupSize * j - 1] +
                           Utilities.Select(j - 1 == bits, 1, 0);
                }

                // Pre-sort the current value into the corresponding segment
                if (inRange)
                {
                    view[pos] = value;
                }
                Group.Barrier();

                gridIdx += Grid.DimX;
            }
Ejemplo n.º 16
0
        static void ScalarConsecutiveOperationKernel(Index1 index, ArrayView <float> OutPut, ArrayView <float> Input, float Scalar, SpecializedValue <int> operation)
        {
            switch ((Operations)operation.Value)
            {
            case Operations.multiplication:
                OutPut[index] = Input[index] * Scalar;
                break;

            case Operations.addition:
                OutPut[index] = Input[index] + Scalar;
                break;

            case Operations.subtraction:
                OutPut[index] = Input[index] - Scalar;
                break;

            case Operations.flipSubtraction:
                OutPut[index] = Scalar - Input[index];
                break;

            case Operations.division:
                OutPut[index] = Input[index] / Scalar;
                break;

            case Operations.inverseDivision:
                OutPut[index] = Scalar / Input[index];
                break;

            case Operations.power:
                OutPut[index] = XMath.Pow(Input[index], Scalar);
                break;

            case Operations.powerFlipped:
                OutPut[index] = XMath.Pow(Scalar, Input[index]);
                break;

            case Operations.squareOfDiffs:
                OutPut[index] = XMath.Pow((Input[index] - Scalar), 2f);
                break;
            }
        }