Ejemplo n.º 1
0
        private static void IlGpuKernelConstants(
            ArrayView2D <Real> mSquaredDistances,
            ArrayView <Real> mCoordinates,
            SpecializedValue <int> c,
            int n)
        {
            // Same as CudaKernelOptimised2, but the number of coordinates is given as a meta-constant.
            // Also, we write the results as float2.

            var shared       = SharedMemory.GetDynamic <Real>();
            var coordinatesI = shared.GetSubView(0, c * Group.DimX);
            var coordinatesJ = shared.GetSubView(c * Group.DimX);

            var bI = Grid.IdxY * Group.DimX;
            var bJ = Grid.IdxX * Group.DimX;

            for (int k = 0; k != c; ++k)
            {
                if (bI + Group.IdxX < n)
                {
                    coordinatesI[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bI + Group.IdxX];
                }

                if (bJ + Group.IdxX < n)
                {
                    coordinatesJ[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bJ + Group.IdxX];
                }
            }

            Group.Barrier();

            var line = Group.IdxX / (Group.DimX / 2);
            var tid  = Group.IdxX % (Group.DimX / 2);

            if (bJ + tid * 2 < n)
            {
                var coordinatesJ2 = coordinatesJ.Cast <IlReal2>();

                for (int i = line; i < Group.DimX & bI + i < n; i += 2)
                {
                    var dist = default(IlReal2);

                    for (int k = 0; k != c; ++k)
                    {
                        var coord1 = coordinatesI[k * Group.DimX + i];
                        var coord2 = coordinatesJ2[(k * Group.DimX / 2) + tid];
                        var diff   = new IlReal2(coord1 - coord2.X, coord1 - coord2.Y);

                        dist += diff * diff;
                    }

                    var dst = mSquaredDistances.Cast <IlReal2>();
                    dst[bJ / 2 + tid, bI + i] = dist;
                }
            }
        }
Ejemplo n.º 2
0
        private static void IlGpuKernelFloat2(
            ArrayView2D <Real> mSquaredDistances,
            ArrayView <Real> mCoordinates,
            int c,
            int n)
        {
            // Same as KernelSharedMemory, but one thread does two element in one by using float2 reads.

            var shared       = SharedMemory.GetDynamic <Real>();
            var coordinatesI = shared.GetSubView(0, c * Group.DimX);
            var coordinatesJ = shared.GetSubView(c * Group.DimX);

            var bI = Grid.IdxY * Group.DimX;
            var bJ = Grid.IdxX * Group.DimX;

            for (int k = 0; k != c; ++k)
            {
                if (bI + Group.IdxX < n)
                {
                    coordinatesI[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bI + Group.IdxX];
                }

                if (bJ + Group.IdxX < n)
                {
                    coordinatesJ[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bJ + Group.IdxX];
                }
            }

            Group.Barrier();

            var line = Group.IdxX / (Group.DimX / 2);
            var tid  = Group.IdxX % (Group.DimX / 2);

            if (bJ + tid * 2 < n)
            {
                var coordinatesJ2 = coordinatesJ.Cast <IlReal2>();

                for (int i = line; i < Group.DimX && bI + i < n; i += 2)
                {
                    var dist = default(IlReal2);

                    for (int k = 0; k != c; ++k)
                    {
                        var coord1 = coordinatesI[k * Group.DimX + i];
                        var coord2 = coordinatesJ2[(k * Group.DimX / 2) + tid];
                        var diff   = new IlReal2(coord1 - coord2.X, coord1 - coord2.Y);

                        dist += diff * diff;
                    }

                    mSquaredDistances[bJ + 2 * tid + 0, bI + i] = dist.X;
                    mSquaredDistances[bJ + 2 * tid + 1, bI + i] = dist.Y;
                }
            }
        }
Ejemplo n.º 3
0
        private static void DynamicSharedMemoryHelper(int globalIndex, ArrayView <short> view)
        {
            // Get a dynamically allocated shared memory view with a custom element type.
            var dynamicMemory = SharedMemory.GetDynamic <short>();

            // Store data in shared memory
            dynamicMemory[Group.IdxX] = (short)Group.IdxX;
            Group.Barrier();

            // Read data from shared memory
            view[globalIndex] = dynamicMemory[Group.IdxX];
        }
Ejemplo n.º 4
0
        // The kernel
        public static void SharedMemKernel(ArrayView <int> view1, ArrayView <short> view2)
        {
            var globalIndex = Grid.GlobalIndex.X;

            // Allocate a statically known amount of shared memory
            // var staticMemory = SharedMemory.Allocate<int>(1024);

            // Get a dynamically allocated shared memory view.
            var dynamicMemory = SharedMemory.GetDynamic <int>();

            // Store data in shared memory
            dynamicMemory[Group.IdxX] = Group.IdxX;
            Group.Barrier();

            // Read data from shared memory
            view1[globalIndex] = dynamicMemory[Group.IdxX];

            // Call another function that uses dynamic shared memory
            DynamicSharedMemoryHelper(globalIndex, view2);
        }
Ejemplo n.º 5
0
        private static void IlGpuKernelSharedMemory(
            ArrayView2D <Real> mSquaredDistances,
            ArrayView <Real> mCoordinates,
            int c,
            int n)
        {
            // We've got shared memory of two vector of K dimensions for B points:
            //
            //      var coordI = __shared__ new Real[k*blockDim.x];
            //      var coordJ = __shared__ new Real[k*blockDim.x];
            //
            // We fill in these two vectors with the coordinates of the I points and J points.
            // Afterwards, the current block will compute the euclidean distances between all
            // the I points and J points, producing a square matrix [B, B].
            //
            // This optimisation means that when producing the square matrix, the I and J points
            // coordinates are only read once.
            //
            // This optimisation works well if K is small enough. Otherwise the shared memory is
            // too small and not enough blocks get schedule per SM.

            var shared       = SharedMemory.GetDynamic <Real>();
            var coordinatesI = shared.GetSubView(0, c * Group.DimX);
            var coordinatesJ = shared.GetSubView(c * Group.DimX);

            var bI = Grid.IdxY * Group.DimX;
            var bJ = Grid.IdxX * Group.DimX;

            for (int k = 0; k != c; ++k)
            {
                if (bI + Group.IdxX < n)
                {
                    coordinatesI[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bI + Group.IdxX];
                }

                if (bJ + Group.IdxX < n)
                {
                    coordinatesJ[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bJ + Group.IdxX];
                }
            }

            Group.Barrier();

            if (bJ + Group.IdxX < n)
            {
                for (int i = 0; i < Group.DimX && bI + i < n; ++i)
                {
                    Real dist = 0;

                    for (int k = 0; k != c; ++k)
                    {
                        var coord1 = coordinatesI[k * Group.DimX + i];          //mCoordinates[k * x + i];
                        var coord2 = coordinatesJ[k * Group.DimX + Group.IdxX]; //mCoordinates[k * x + j];
                        var diff   = coord1 - coord2;

                        dist += diff * diff;
                    }

                    mSquaredDistances[bJ + Group.IdxX, bI + i] = dist;
                }
            }
        }