Esempio n. 1
0
        private static void MatrixMulShared(ArrayView <float> a, ArrayView <float> b, ArrayView <float> c, int N)
        {
            int index = Grid.GlobalIndex.X;

            if (index >= c.Length)
            {
                return;
            }

            int gx = Grid.GlobalIndex.X;
            int gy = Grid.GlobalIndex.Y;
            int lx = Group.IdxX;
            int ly = Group.IdxY;

            float sum = 0;

            var sa = SharedMemory.Allocate2D <float>(groupSize, groupSize);
            var sb = SharedMemory.Allocate2D <float>(groupSize, groupSize);

            for (int k = 0; k < N; k += groupSize)
            {
                sa[lx, ly] = a[gy * N + lx + k];
                sb[lx, ly] = b[(ly + k) * N + gx];
                Group.Barrier();
                for (int r = 0; r < groupSize; r++)
                {
                    sum += sa[r, ly] * sb[lx, r];
                }
                Group.Barrier();
            }
            c[gy * N + gx] = sum;
        }
Esempio n. 2
0
        /// <summary>
        /// The tiled matrix multiplication kernel that runs on the accelerated device.
        /// </summary>
        /// <param name="aView">An input matrix of size MxK</param>
        /// <param name="bView">An input matrix of size KxN</param>
        /// <param name="cView">An output matrix of size MxN</param>
        static void MatrixMultiplyTiledKernel(
            ArrayView2D <float, Stride2D.DenseX> aView,
            ArrayView2D <float, Stride2D.DenseX> bView,
            ArrayView2D <float, Stride2D.DenseX> cView)
        {
            var global = Grid.GlobalIndex.XY;
            var x      = Group.IdxX;
            var y      = Group.IdxY;

            var aTile = SharedMemory.Allocate2D <float, Stride2D.DenseX>(new Index2D(TILE_SIZE, TILE_SIZE), new Stride2D.DenseX(TILE_SIZE));
            var bTile = SharedMemory.Allocate2D <float, Stride2D.DenseX>(new Index2D(TILE_SIZE, TILE_SIZE), new Stride2D.DenseX(TILE_SIZE));
            var sum   = 0.0f;

            for (var i = 0; i < aView.IntExtent.X; i += TILE_SIZE)
            {
                if (global.X < aView.IntExtent.X && y + i < aView.IntExtent.Y)
                {
                    aTile[x, y] = aView[global.X, y + i];
                }
                else
                {
                    aTile[x, y] = 0;
                }

                if (x + i < bView.IntExtent.X && global.Y < bView.IntExtent.Y)
                {
                    bTile[x, y] = bView[x + i, global.Y];
                }
                else
                {
                    bTile[x, y] = 0;
                }
                Group.Barrier();

                for (var k = 0; k < TILE_SIZE; k++)
                {
                    sum += aTile[new Index2D(x, k)] * bTile[new Index2D(k, y)];
                }
                Group.Barrier();
            }

            if (global.X < cView.IntExtent.X && global.Y < cView.IntExtent.Y)
            {
                cView[global] = sum;
            }
        }
Esempio n. 3
0
        /// <summary>
        /// The tiled matrix multiplication kernel that runs on the accelerated device.
        /// </summary>
        /// <param name="index">Current matrix index</param>
        /// <param name="aView">An input matrix of size MxK</param>
        /// <param name="bView">An input matrix of size KxN</param>
        /// <param name="cView">An output matrix of size MxN</param>
        static void MatrixMultiplyTiledKernel(GroupedIndex2 index, ArrayView2D <float> aView, ArrayView2D <float> bView, ArrayView2D <float> cView)
        {
            var global = index.ComputeGlobalIndex();
            var x      = index.GroupIdx.X;
            var y      = index.GroupIdx.Y;

            var aTile = SharedMemory.Allocate2D <float>(TILE_SIZE, TILE_SIZE);
            var bTile = SharedMemory.Allocate2D <float>(TILE_SIZE, TILE_SIZE);
            var sum   = 0.0f;

            for (var i = 0; i < aView.Width; i += TILE_SIZE)
            {
                if (global.X < aView.Width && y + i < aView.Height)
                {
                    aTile[x, y] = aView[global.X, y + i];
                }
                else
                {
                    aTile[x, y] = 0;
                }

                if (x + i < bView.Width && global.Y < bView.Height)
                {
                    bTile[x, y] = bView[x + i, global.Y];
                }
                else
                {
                    bTile[x, y] = 0;
                }
                Group.Barrier();

                for (var k = 0; k < TILE_SIZE; k++)
                {
                    sum += aTile[new Index2(x, k)] * bTile[new Index2(k, y)];
                }
                Group.Barrier();
            }

            if (global.X < cView.Width && global.Y < cView.Height)
            {
                cView[global] = sum;
            }
        }