示例#1
0
        private static void IlGpuKernelConstants(
            ArrayView2D <Real> mSquaredDistances,
            ArrayView <Real> mCoordinates,
            SpecializedValue <int> c,
            int n)
        {
            // Same as CudaKernelOptimised2, but the number of coordinates is given as a meta-constant.
            // Also, we write the results as float2.

            var shared       = SharedMemory.GetDynamic <Real>();
            var coordinatesI = shared.GetSubView(0, c * Group.DimX);
            var coordinatesJ = shared.GetSubView(c * Group.DimX);

            var bI = Grid.IdxY * Group.DimX;
            var bJ = Grid.IdxX * Group.DimX;

            for (int k = 0; k != c; ++k)
            {
                if (bI + Group.IdxX < n)
                {
                    coordinatesI[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bI + Group.IdxX];
                }

                if (bJ + Group.IdxX < n)
                {
                    coordinatesJ[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bJ + Group.IdxX];
                }
            }

            Group.Barrier();

            var line = Group.IdxX / (Group.DimX / 2);
            var tid  = Group.IdxX % (Group.DimX / 2);

            if (bJ + tid * 2 < n)
            {
                var coordinatesJ2 = coordinatesJ.Cast <IlReal2>();

                for (int i = line; i < Group.DimX & bI + i < n; i += 2)
                {
                    var dist = default(IlReal2);

                    for (int k = 0; k != c; ++k)
                    {
                        var coord1 = coordinatesI[k * Group.DimX + i];
                        var coord2 = coordinatesJ2[(k * Group.DimX / 2) + tid];
                        var diff   = new IlReal2(coord1 - coord2.X, coord1 - coord2.Y);

                        dist += diff * diff;
                    }

                    var dst = mSquaredDistances.Cast <IlReal2>();
                    dst[bJ / 2 + tid, bI + i] = dist;
                }
            }
        }
示例#2
0
        private static void IlGpuKernelLocalMemory(
            ArrayView2D <Real> mSquaredDistances,
            ArrayView <Real> mCoordinates,
            SpecializedValue <int> dimX,
            SpecializedValue <int> c,
            int n)
        {
            // Same as KernelConstants, but use both local and shared memory to increase the effective shared memory.

            var coordinatesI = SharedMemory.Allocate <Real>(c * dimX);
            var coordinatesJ = new IlReal2[c.Value];

            var bI       = Grid.IdxY * dimX;
            var bJ       = Grid.IdxX * dimX;
            var line     = Group.IdxX / (dimX / 2);
            var tid      = Group.IdxX % (dimX / 2);
            var isActive = bJ + tid * 2 < n;

            for (int k = 0; k != c.Value; ++k)
            {
                if (bI + Group.IdxX < n)
                {
                    coordinatesI[k * dimX + Group.IdxX] = mCoordinates[k * n + bI + Group.IdxX];
                }

                if (isActive)
                {
                    var mCoordinates2 = mCoordinates.Cast <IlReal2>();
                    coordinatesJ[k] = mCoordinates2[(k * n + bJ) / 2 + tid];
                }
            }

            Group.Barrier();

            if (isActive)
            {
                for (int i = line; i < dimX && bI + i < n; i += 2)
                {
                    var dist = default(IlReal2);

                    for (int k = 0; k != c.Value; ++k)
                    {
                        var coord1 = coordinatesI[k * dimX + i];
                        var coord2 = coordinatesJ[k];
                        var diff   = new IlReal2(coord1 - coord2.X, coord1 - coord2.Y);

                        dist += diff * diff;
                    }

                    var dst = mSquaredDistances.Cast <IlReal2>();
                    dst[bJ / 2 + tid, bI + i] = dist;
                }
            }
        }