예제 #1
0
        private static void PoissonRBSOR_LMem([Global] float[] grid, [Global] float[] laplacian,
                                              int dimX, int dimY, int gstride, int lstride,
                                              float hx, float hy, float omega, int color,
                                              [Shared] float[] buf)
        {
            int threadIdxX = (int)ThreadIdx.X;
            int threadIdxY = (int)ThreadIdx.Y;
            int blockDimX  = (int)BlockDim.X;
            int blockDimY  = (int)BlockDim.Y;
            int blockIdxX  = (int)BlockIdx.X;
            int blockIdxY  = (int)BlockIdx.Y;

            int col_cnt = BuiltinFunctions.Min(AREA_SIZE_X + 2, dimX - blockIdxX * AREA_SIZE_X);
            int row_cnt = BuiltinFunctions.Min(AREA_SIZE_Y + 2, dimY - blockIdxY * AREA_SIZE_Y);

            for (int row = threadIdxY; row < row_cnt; row += blockDimY)
            {
                int x     = threadIdxX + blockIdxX * AREA_SIZE_X;
                int y     = row + blockIdxY * AREA_SIZE_Y;
                int index = x + y * gstride;
                for (int col = threadIdxX; col < col_cnt; col += blockDimX, index += blockDimX)
                {
                    buf[IdxBuf(row, col)] = grid[index];
                }
            }

            BuiltinFunctions.SyncThreads();

            col_cnt -= 2;
            row_cnt -= 2;

            int col_start = 2 * threadIdxX;
            int col_delta = 2 * blockDimX;

            float b  = 2 * hx * hy;
            float a1 = 2 * hy / hx;
            float a2 = 2 * hx / hy;
            float p  = 0.5f * omega / (a1 + a2);
            float q  = 1 - omega;

            for (int row = threadIdxY; row < row_cnt; row += blockDimY)
            {
                int col_offset = col_start + (color + row) % 2;
                int x          = col_offset + blockIdxX * AREA_SIZE_X;
                int y          = row + blockIdxY * AREA_SIZE_Y;
                int index      = x + 1 + (y + 1) * gstride;

                for (int col = col_offset; col < col_cnt; col += col_delta, index += col_delta, x += col_delta)
                {
                    grid[index] = (b * laplacian[x + y * lstride] +
                                   a1 * (buf[IdxBuf(row + 2, col + 1)] + buf[IdxBuf(row, col + 1)]) +
                                   a2 * (buf[IdxBuf(row + 1, col + 2)] + buf[IdxBuf(row + 1, col)])) * p +
                                  buf[IdxBuf(row + 1, col + 1)] * q;
                }
            }
        }
예제 #2
0
        private static void PoissonJacobi([Global] float[] input, [Global] float[] output, [Shared] float[] buf,
                                          uint dimX, uint dimY, uint stride,
                                          float a1, float a2, float a3, float a4, float a,
                                          float hx, float hy, float x0, float y0)
        {
            uint col_cnt = BuiltinFunctions.Min(AREA_SIZE_X + 2, dimX - BlockIdx.X * AREA_SIZE_X);
            uint row_cnt = BuiltinFunctions.Min(AREA_SIZE_Y + 2, dimY - BlockIdx.Y * AREA_SIZE_Y);

            for (uint row = ThreadIdx.Y; row < row_cnt; row += BlockDim.Y)
            {
                uint x   = ThreadIdx.X + BlockIdx.X * AREA_SIZE_X;
                uint y   = row + BlockIdx.Y * AREA_SIZE_Y;
                uint idx = x + y * stride;
                for (uint col = ThreadIdx.X; col < col_cnt; col += BlockDim.X, idx += BlockDim.X)
                {
                    buf[IdxBuf(row, col)] = input[idx];
                }
            }

            BuiltinFunctions.SyncThreads();

            col_cnt -= 2;
            row_cnt -= 2;

            for (uint row = ThreadIdx.Y; row < row_cnt; row += BlockDim.Y)
            {
                uint x   = 1 + ThreadIdx.X + BlockIdx.X * AREA_SIZE_X;
                uint y   = 1 + row + BlockIdx.Y * AREA_SIZE_Y;
                uint idx = x + y * stride;
                for (uint col = ThreadIdx.X; col < col_cnt; col += BlockDim.X, idx += BlockDim.X, x += BlockDim.X)
                {
                    float F = 2 * hx * hy * J(x0 + x * hx, y0 + y * hy);
                    output[idx] = (a1 * buf[IdxBuf(row + 2, col + 1)] + a2 * buf[IdxBuf(row + 1, col + 2)] +
                                   a3 * buf[IdxBuf(row, col + 1)] + a4 * buf[IdxBuf(row + 1, col)] + F) / a;
                }
            }
        }