/// <summary> /// A simple 1D kernel using basic atomic functions. /// The second parameter (<paramref name="dataView"/>) represents the target /// view for all atomic operations. /// </summary> /// <param name="index">The current thread index.</param> /// <param name="dataView">The view pointing to our memory buffer.</param> /// <param name="constant">A uniform constant.</param> static void AtomicOperationKernel( Index index, // The global thread index (1D in this case) ArrayView <int> dataView, // A view to a chunk of memory (1D in this case) int constant) // A sample uniform constant { // dataView[0] += constant Atomic.Add(dataView.GetVariableView(0), constant); // dataView[1] -= constant Atomic.Sub(dataView.GetVariableView(1), constant); // dataView[2] = Max(dataView[2], constant) Atomic.Max(dataView.GetVariableView(2), constant); // dataView[3] = Min(dataView[3], constant) Atomic.Min(dataView.GetVariableView(3), constant); // dataView[4] = Min(dataView[4], constant) Atomic.And(dataView.GetVariableView(4), constant); // dataView[5] = Min(dataView[5], constant) Atomic.Or(dataView.GetVariableView(5), constant); // dataView[6] = Min(dataView[6], constant) Atomic.Xor(dataView.GetVariableView(6), constant); }
/// <summary> /// Explicitly grouped kernels receive an index type (first parameter) of type: /// <see cref="GroupedIndex"/>, <see cref="GroupedIndex2"/> or <see cref="GroupedIndex3"/>. /// Shared memory is only supported in explicitly-grouped kernel contexts. /// Shared-memory parameters are automatically handled by the runtime and have to be /// annotated with the SharedMemoryAttribute. Note that currently, the only supported /// shared-memory parameters are VariableViews and ArrayViews. /// </summary> /// <param name="index">The current thread index.</param> /// <param name="dataView">The view pointing to our memory buffer.</param> /// <param name="sharedVariable">Implicit shared-memory parameter that is handled by the runtime.</param> static void SharedMemoryVariableKernel( GroupedIndex index, // The grouped thread index (1D in this case) ArrayView <int> dataView, // A view to a chunk of memory (1D in this case) ArrayView <int> outputView, // A view to a chunk of memory (1D in this case) [SharedMemory] // Declares a single variable of type int in VariableView <int> sharedVariable) // shared memory (= 4 bytes) { // Compute the global 1D index for accessing the data view var globalIndex = index.ComputeGlobalIndex(); // Initialize shared memory if (index.GroupIdx.IsFirst) { sharedVariable.Value = 0; } // Wait for the initialization to complete Group.Barrier(); if (globalIndex < dataView.Length) { Atomic.Max(sharedVariable, dataView[globalIndex]); } // Wait for all threads to complete the maximum computation process Group.Barrier(); // Write the maximum of all values into the data view if (globalIndex < outputView.Length) { outputView[globalIndex] = sharedVariable.Value; } }
/// <summary> /// A simple 1D kernel using basic atomic functions. /// The second parameter (<paramref name="dataView"/>) represents the target /// view for all atomic operations. /// </summary> /// <param name="index">The current thread index.</param> /// <param name="dataView">The view pointing to our memory buffer.</param> /// <param name="constant">A uniform constant.</param> static void AtomicOperationKernel( Index1D index, // The global thread index (1D in this case) ArrayView <int> dataView, // A view to a chunk of memory (1D in this case) int constant) // A sample uniform constant { // dataView[0] += constant Atomic.Add(ref dataView[0], constant); // dataView[1] = Max(dataView[1], constant) Atomic.Max(ref dataView[1], constant); // dataView[2] = Min(dataView[2], constant) Atomic.Min(ref dataView[2], constant); // dataView[3] = Min(dataView[3], constant) Atomic.And(ref dataView[3], constant); // dataView[4] = Min(dataView[4], constant) Atomic.Or(ref dataView[4], constant); // dataView[6] = Min(dataView[5], constant) Atomic.Xor(ref dataView[5], constant); }
static void Test3Kernel( GroupedIndex index, ArrayView<short> heights, ArrayView<float> matrices, ArrayView<float> horizon, int a_line, int a_sample, [SharedMemory(1440)] ArrayView<float> horizon_shared) { var idx = index.GroupIdx; const int patch_size = 128; // Do the calculation var aline = index.GridIdx; for (var asample = 0; asample < patch_size; asample++) { // Copy horizon for a[line,sample] into shared memory { var dim = Group.Dimension.X; var len = horizon_shared.Length; var passes = (len + (dim - 1)) / dim; var offset = (aline * patch_size + asample) * len; for (var pass = 0; pass < passes; pass++) { var ptr = pass * dim + idx; if (ptr < len) horizon_shared[ptr] = horizon[ptr + offset]; // Note warp divergence } } Group.Barrier(); // Copy the matrix into registers var pos = (aline * patch_size + asample) * 12; var row0x = matrices[pos++]; var row1x = matrices[pos++]; var row2x = matrices[pos++]; var row3x = matrices[pos++]; var row0y = matrices[pos++]; var row1y = matrices[pos++]; var row2y = matrices[pos++]; var row3y = matrices[pos++]; var row0z = matrices[pos++]; var row1z = matrices[pos++]; var row2z = matrices[pos++]; var row3z = matrices[pos]; for (var oline = 0; oline < patch_size; oline++) { // osample = idx var relz = 0.5d * heights[aline * patch_size + idx]; var radius = MoonRadius + relz / 1000d; var line = a_line + aline; var sample = a_sample + idx; var map_x = (sample - S0) * Scale; var map_y = (L0 - line) * Scale; var P = Math.Sqrt(map_x * map_x + map_y * map_y); var C = 2d * Math.Atan2(P, 2 * MoonRadius); var latitude = Math.Asin(Math.Cos(C) * Math.Sin(LatP) + map_y * Math.Sin(C) * Math.Cos(LatP) / P); var longitude = LonP + Math.Atan2(map_x, map_y * LonFactor); var latdeg = latitude * 180d / Math.PI; var londeg = longitude * 180d / Math.PI; // Calculate the other point in ME frame var z_me = radius * Math.Sin(latitude); var c = radius * Math.Cos(latitude); var x_me = c * Math.Cos(longitude); var y_me = c * Math.Sin(longitude); // Transform the point to the local frame var x = x_me * row0x + y_me * row1x + z_me * row2x + row3x; var y = x_me * row0y + y_me * row1y + z_me * row2y + row3y; var z = x_me * row0z + y_me * row1z + z_me * row2z + row3z; // if (idx == 0) // relz = relz; var azimuth = Math.Atan2(y, x) + Math.PI; // [0,2 PI] var alen = Math.Sqrt(x * x + y * y); var slope = z / alen; var slopef = (float)slope; var horizon_index = (int)(0.5d + 1439 * (azimuth / (2d * Math.PI))); Atomic.Max(horizon_shared.GetVariableView(horizon_index), slopef); //horizon_shared[horizon_index] = 1f; } Group.Barrier(); { var dim = Group.Dimension.X; var len = horizon_shared.Length; var passes = (len + (dim - 1)) / dim; var offset = (aline * patch_size + asample) * len; for (var pass = 0; pass < passes; pass++) { var ptr = pass * dim + idx; if (ptr < len) horizon[ptr + offset] = horizon_shared[ptr]; // Note warp divergence } } } }
static void ShadowKernel1( GroupedIndex2 index, ArrayView <float> points, ArrayView <float> matrices, ArrayView <int> horizon, ArrayView <float> test_array, [SharedMemory(1440)] ArrayView <int> horizon_shared) { var target_line = index.GridIdx.Y; var target_sample = index.GridIdx.X; var caster_line = index.GroupIdx.Y; Debug.Assert(index.GroupIdx.X == 1); // Copy horizon for a[target_line,target_sample] into shared memory { var dim = Group.Dimension.Y; var len = horizon_shared.Length; var passes = (len + (dim - 1)) / dim; var offset = (target_line * TerrainPatch.DefaultSize + target_sample) * len; for (var pass = 0; pass < passes; pass++) { var ptr = pass * dim + caster_line; if (ptr < len) // divergence { horizon_shared[ptr] = horizon[ptr + offset]; } } } Group.Barrier(); // Copy the matrix into registers var pos = (target_line * TerrainPatch.DefaultSize + target_sample) * 12; var row0x = matrices[pos++]; var row1x = matrices[pos++]; var row2x = matrices[pos++]; var row3x = matrices[pos++]; var row0y = matrices[pos++]; var row1y = matrices[pos++]; var row2y = matrices[pos++]; var row3y = matrices[pos++]; var row0z = matrices[pos++]; var row1z = matrices[pos++]; var row2z = matrices[pos++]; var row3z = matrices[pos]; for (var caster_sample = 0; caster_sample < TerrainPatch.DefaultSize; caster_sample++) { // Fetch the other point in local frame var points_offset = (caster_line * TerrainPatch.DefaultSize + caster_sample) * 3; var x_patch = points[points_offset]; var y_patch = points[points_offset + 1]; var z_patch = points[points_offset + 2]; // Transform the point to the local frame var x = x_patch * row0x + y_patch * row1x + z_patch * row2x + row3x; var y = x_patch * row0y + y_patch * row1y + z_patch * row2y + row3y; var z = x_patch * row0z + y_patch * row1z + z_patch * row2z + row3z; // Adjust for solar array height (this is temporary, and I'm not sure we want this in the final version) z -= ObserverHeight; // meters var azimuth = GPUMath.Atan2(y, x) + GPUMath.PI; // [0,2 PI] var alen = GPUMath.Sqrt(x * x + y * y); var slope = z / alen; var slopem = slope > 2f ? 2f : slope; slopem = slopem < -2f ? -2f : slopem; slopem = slopem / 4f; var slopei = (int)(slopem * 1000000); var horizon_index = (int)(0.5f + 1439 * (azimuth / (2f * GPUMath.PI))); Atomic.Max(horizon_shared.GetVariableView(horizon_index), slopei); if (caster_sample == 0 && caster_line == 0 && target_line == 0 && target_sample == 0) { test_array[0] = x_patch; test_array[1] = y_patch; test_array[2] = z_patch; test_array[3] = x; test_array[4] = y; test_array[5] = z; test_array[6] = slope; test_array[7] = slopem; test_array[8] = slopei; test_array[9] = row3x; test_array[10] = row3y; test_array[11] = row3z; } } Group.Barrier(); { var dim = Group.Dimension.Y; var len = horizon_shared.Length; var passes = (len + (dim - 1)) / dim; var offset = (target_line * TerrainPatch.DefaultSize + target_sample) * len; for (var pass = 0; pass < passes; pass++) { var ptr = pass * dim + caster_line; if (ptr < len) // divergence { horizon[ptr + offset] = horizon_shared[ptr]; } } } }