コード例 #1
0
        /// <summary>
        /// Multiplies two dense matrices and returns the resultant matrix (using tiling).
        /// </summary>
        /// <param name="accelerator">The Accelerator to run the multiplication on</param>
        /// <param name="a">A dense MxK matrix</param>
        /// <param name="b">A dense KxN matrix</param>
        /// <returns>A dense MxN matrix</returns>
        static float[,] MatrixMultiplyTiled(Accelerator accelerator, float[,] a, float[,] b)
        {
            var m  = a.GetLength(0);
            var ka = a.GetLength(1);
            var kb = b.GetLength(0);
            var n  = b.GetLength(1);

            if (ka != kb)
            {
                throw new ArgumentException($"Cannot multiply {m}x{ka} matrix by {n}x{kb} matrix", nameof(b));
            }

            var kernel          = accelerator.LoadStreamKernel <GroupedIndex2, ArrayView2D <float>, ArrayView2D <float>, ArrayView2D <float> >(MatrixMultiplyTiledKernel);
            var groupSize       = new Index2(TILE_SIZE, TILE_SIZE);
            var numGroups       = new Index2((m + TILE_SIZE - 1) / TILE_SIZE, (n + TILE_SIZE - 1) / TILE_SIZE);
            var launchDimension = new GroupedIndex2(numGroups, groupSize);

            using (var aBuffer = accelerator.Allocate <float>(m, ka))
                using (var bBuffer = accelerator.Allocate <float>(ka, n))
                    using (var cBuffer = accelerator.Allocate <float>(m, n))
                    {
                        aBuffer.CopyFrom(a, Index2.Zero, Index2.Zero, aBuffer.Extent);
                        bBuffer.CopyFrom(b, Index2.Zero, Index2.Zero, bBuffer.Extent);

                        kernel(launchDimension, aBuffer, bBuffer, cBuffer);
                        accelerator.Synchronize();

                        return(cBuffer.GetAs2DArray());
                    }
        }
コード例 #2
0
        public void GroupedIndex2EntryPoint(int length)
        {
            var end = (int)Math.Sqrt(Accelerator.MaxNumThreadsPerGroup);

            for (int i = 1; i <= end; i <<= 1)
            {
                var stride = new Index2(i, i);
                var extent = new GroupedIndex2(
                    new Index2(length, length),
                    stride);
                using var buffer = Accelerator.Allocate <int>(extent.Size);
                buffer.MemSetToZero(Accelerator.DefaultStream);
                Execute(extent, buffer.View, stride, extent.GridIdx);

                var expected = new int[extent.Size];
                for (int j = 0; j < length * length; ++j)
                {
                    var gridIdx = Index2.ReconstructIndex(j, extent.GridIdx);
                    for (int k = 0; k < i * i; ++k)
                    {
                        var groupIdx = Index2.ReconstructIndex(k, extent.GroupIdx);
                        var idx      = (gridIdx * stride + groupIdx).ComputeLinearIndex(extent.GridIdx);
                        expected[idx] = idx;
                    }
                }

                Verify(buffer, expected);
            }
        }
コード例 #3
0
        internal static void GroupedIndex2EntryPointKernel(
            GroupedIndex2 index, ArrayView <int> output, Index2 stride, Index2 extent)
        {
            var idx1 = index.GridIdx.X * stride.X + index.GroupIdx.X;
            var idx2 = index.GridIdx.Y * stride.Y + index.GroupIdx.Y;
            var idx  = idx2 * extent.X + idx1;

            output[idx] = idx;
        }
コード例 #4
0
        /// <summary>
        /// The tiled matrix multiplication kernel that runs on the accelerated device.
        /// </summary>
        /// <param name="index">Current matrix index</param>
        /// <param name="aView">An input matrix of size MxK</param>
        /// <param name="bView">An input matrix of size KxN</param>
        /// <param name="cView">An output matrix of size MxN</param>
        static void MatrixMultiplyTiledKernel(GroupedIndex2 index, ArrayView2D <float> aView, ArrayView2D <float> bView, ArrayView2D <float> cView)
        {
            var global = index.ComputeGlobalIndex();
            var x      = index.GroupIdx.X;
            var y      = index.GroupIdx.Y;

            var aTile = SharedMemory.Allocate2D <float>(TILE_SIZE, TILE_SIZE);
            var bTile = SharedMemory.Allocate2D <float>(TILE_SIZE, TILE_SIZE);
            var sum   = 0.0f;

            for (var i = 0; i < aView.Width; i += TILE_SIZE)
            {
                if (global.X < aView.Width && y + i < aView.Height)
                {
                    aTile[x, y] = aView[global.X, y + i];
                }
                else
                {
                    aTile[x, y] = 0;
                }

                if (x + i < bView.Width && global.Y < bView.Height)
                {
                    bTile[x, y] = bView[x + i, global.Y];
                }
                else
                {
                    bTile[x, y] = 0;
                }
                Group.Barrier();

                for (var k = 0; k < TILE_SIZE; k++)
                {
                    sum += aTile[new Index2(x, k)] * bTile[new Index2(k, y)];
                }
                Group.Barrier();
            }

            if (global.X < cView.Width && global.Y < cView.Height)
            {
                cView[global] = sum;
            }
        }
コード例 #5
0
        static void ShadowKernel1(
            GroupedIndex2 index,
            ArrayView <float> points,
            ArrayView <float> matrices,
            ArrayView <int> horizon,
            ArrayView <float> test_array,

            [SharedMemory(1440)]
            ArrayView <int> horizon_shared)
        {
            var target_line   = index.GridIdx.Y;
            var target_sample = index.GridIdx.X;
            var caster_line   = index.GroupIdx.Y;

            Debug.Assert(index.GroupIdx.X == 1);

            // Copy horizon for a[target_line,target_sample] into shared memory
            {
                var dim    = Group.Dimension.Y;
                var len    = horizon_shared.Length;
                var passes = (len + (dim - 1)) / dim;
                var offset = (target_line * TerrainPatch.DefaultSize + target_sample) * len;
                for (var pass = 0; pass < passes; pass++)
                {
                    var ptr = pass * dim + caster_line;
                    if (ptr < len)  // divergence
                    {
                        horizon_shared[ptr] = horizon[ptr + offset];
                    }
                }
            }

            Group.Barrier();

            // Copy the matrix into registers
            var pos = (target_line * TerrainPatch.DefaultSize + target_sample) * 12;

            var row0x = matrices[pos++];
            var row1x = matrices[pos++];
            var row2x = matrices[pos++];
            var row3x = matrices[pos++];

            var row0y = matrices[pos++];
            var row1y = matrices[pos++];
            var row2y = matrices[pos++];
            var row3y = matrices[pos++];

            var row0z = matrices[pos++];
            var row1z = matrices[pos++];
            var row2z = matrices[pos++];
            var row3z = matrices[pos];

            for (var caster_sample = 0; caster_sample < TerrainPatch.DefaultSize; caster_sample++)
            {
                // Fetch the other point in local frame
                var points_offset = (caster_line * TerrainPatch.DefaultSize + caster_sample) * 3;
                var x_patch       = points[points_offset];
                var y_patch       = points[points_offset + 1];
                var z_patch       = points[points_offset + 2];

                // Transform the point to the local frame
                var x = x_patch * row0x + y_patch * row1x + z_patch * row2x + row3x;
                var y = x_patch * row0y + y_patch * row1y + z_patch * row2y + row3y;
                var z = x_patch * row0z + y_patch * row1z + z_patch * row2z + row3z;

                // Adjust for solar array height (this is temporary, and I'm not sure we want this in the final version)
                z -= ObserverHeight;                            // meters

                var azimuth = GPUMath.Atan2(y, x) + GPUMath.PI; // [0,2 PI]
                var alen    = GPUMath.Sqrt(x * x + y * y);
                var slope   = z / alen;

                var slopem = slope > 2f ? 2f : slope;
                slopem = slopem < -2f ? -2f : slopem;
                slopem = slopem / 4f;

                var slopei = (int)(slopem * 1000000);

                var horizon_index = (int)(0.5f + 1439 * (azimuth / (2f * GPUMath.PI)));
                Atomic.Max(horizon_shared.GetVariableView(horizon_index), slopei);

                if (caster_sample == 0 && caster_line == 0 && target_line == 0 && target_sample == 0)
                {
                    test_array[0]  = x_patch;
                    test_array[1]  = y_patch;
                    test_array[2]  = z_patch;
                    test_array[3]  = x;
                    test_array[4]  = y;
                    test_array[5]  = z;
                    test_array[6]  = slope;
                    test_array[7]  = slopem;
                    test_array[8]  = slopei;
                    test_array[9]  = row3x;
                    test_array[10] = row3y;
                    test_array[11] = row3z;
                }
            }

            Group.Barrier();

            {
                var dim    = Group.Dimension.Y;
                var len    = horizon_shared.Length;
                var passes = (len + (dim - 1)) / dim;
                var offset = (target_line * TerrainPatch.DefaultSize + target_sample) * len;
                for (var pass = 0; pass < passes; pass++)
                {
                    var ptr = pass * dim + caster_line;
                    if (ptr < len)  // divergence
                    {
                        horizon[ptr + offset] = horizon_shared[ptr];
                    }
                }
            }
        }
コード例 #6
0
        /// <summary>
        /// Update the horizons of a patch based on a list of shadow casters.
        /// The horizons will be in slope, not angle, format
        /// </summary>
        /// <param name="target"></param>
        /// <param name="casters"></param>
        public void UpdateHorizons(TerrainPatch target, List <TerrainPatch> casters)
        {
            Debug.Assert(Terrain != null);
            if (casters.Count < 1)
            {
                return;
            }
            using (var context = new Context())
            {
                AcceleratorId aid = Accelerator.Accelerators.Where(id => id.AcceleratorType == AcceleratorType.Cuda).FirstOrDefault();
                if (aid.AcceleratorType != AcceleratorType.Cuda)
                {
                    Console.WriteLine(@"There is no CUDA accelerator present.  Doing nothing.");
                    return;
                }
                using (var accelerator = Accelerator.Create(context, aid))
                {
                    target.FillPoints(Terrain);
                    target.FillMatricesRelativeToPoint(Terrain, target.Points[0][0]);

                    // Matrices
                    var cpu_matrices_size = target.Height * target.Width * 12;
                    var basePoint         = target.Points[0][0];
                    var cpu_matrices      = MakeCPUMatrices(target);

                    // Horizon (load from target)
                    var cpu_horizon_size = target.Height * target.Width * Horizon.HorizonSamples;
                    var cpu_horizon      = new int[cpu_horizon_size];
                    for (var line = 0; line < TerrainPatch.DefaultSize; line++)
                    {
                        for (var sample = 0; sample < TerrainPatch.DefaultSize; sample++)
                        {
                            var offset = (line * TerrainPatch.DefaultSize + sample) * Horizon.HorizonSamples;
                            var buffer = target.Horizons[line][sample].Buffer;
                            for (var i = 0; i < Horizon.HorizonSamples; i++)
                            {
                                cpu_horizon[i + offset] = SlopeToEncoding(buffer[i]);
                            }
                        }
                    }

                    // Caster points
                    var cpu_caster_points_size = casters[0].Width * casters[0].Height * 3;
                    var cpu_caster_points      = new float[cpu_caster_points_size];

                    // test array
                    var cpu_test_array = new float[20];

                    using (var gpu_matrices = accelerator.Allocate <float>(cpu_matrices_size))
                        using (var gpu_horizon = accelerator.Allocate <int>(cpu_horizon_size))
                            using (var gpu_caster_points = accelerator.Allocate <float>(cpu_caster_points_size))
                                using (var gpu_test_array = accelerator.Allocate <float>(cpu_test_array.Length))
                                {
                                    gpu_matrices.CopyFrom(cpu_matrices, 0, 0, cpu_matrices_size);
                                    gpu_horizon.CopyFrom(cpu_horizon, 0, 0, cpu_horizon_size);

                                    var groupSize       = accelerator.MaxNumThreadsPerGroup;
                                    var launchDimension = new GroupedIndex2(
                                        new Index2(128, 128), // (data.Length + groupSize - 1) / groupSize,  // Compute the number of groups (round up)
                                        new Index2(1, 128));

                                    var kernel1 = accelerator.LoadSharedMemoryStreamKernel1 <GroupedIndex2, ArrayView <float>, ArrayView <float>, ArrayView <int>, ArrayView <float>, ArrayView <int> >(ShadowKernel1);

                                    //var stopwatch = new Stopwatch();
                                    //stopwatch.Start();

                                    foreach (var caster in casters)
                                    {
                                        caster.FillPoints(Terrain);
                                        CopyPointsToCpuArray(caster, basePoint, cpu_caster_points);
                                        gpu_caster_points.CopyFrom(cpu_caster_points, 0, 0, cpu_caster_points_size);

                                        kernel1(launchDimension, gpu_caster_points, gpu_matrices, gpu_horizon, gpu_test_array);

                                        accelerator.Synchronize();
                                    }

                                    // Copy out data
                                    gpu_horizon.CopyTo(cpu_horizon, 0, 0, cpu_horizon_size);
                                    gpu_test_array.CopyTo(cpu_test_array, 0, 0, cpu_test_array.Length);

                                    //stopwatch.Stop();
                                    //Console.WriteLine($"kernel time={stopwatch.Elapsed} cpu_horizon.Max()={cpu_horizon.Max()} cpu_horizon[0]={cpu_horizon[0]}");

                                    // Update the horizons
                                    for (var line = 0; line < TerrainPatch.DefaultSize; line++)
                                    {
                                        for (var sample = 0; sample < TerrainPatch.DefaultSize; sample++)
                                        {
                                            var offset = (line * TerrainPatch.DefaultSize + sample) * Horizon.HorizonSamples;
                                            var buffer = target.Horizons[line][sample].Buffer;
                                            for (var i = 0; i < Horizon.HorizonSamples; i++)
                                            {
                                                buffer[i] = EncodingToSlope(cpu_horizon[i + offset]);
                                            }
                                        }
                                    }

                                    //Console.WriteLine($"  max slope={cpu_horizon.Select(EncodingToSlope).Max()}");
                                }
                }
            }
        }
コード例 #7
0
 /// <summary>
 /// Computes the global index of a grouped index (gridIdx, groupIdx).
 /// </summary>
 /// <param name="index">The grouped index.</param>
 /// <returns>The computes global index.</returns>
 public static Index2 ComputeGlobalIndex(GroupedIndex2 index)
 {
     return(ComputeGlobalIndex(index.GridIdx, index.GroupIdx));
 }