/// <summary> /// /// </summary> /// <param name="input"></param> /// <returns></returns> public static int SequentialAddressing(int[] input) { // Preconditions if (input == null) { throw new ArgumentNullException("input"); } // Declare variables to hold the current number of threads per block and number of blocks; re-used throughout the code below int NumThreadsPerBlock = 0, NumBlocks = 0; // Calculate the block and grid sizes needed for the first "level" of the reduction CalculateBlockAndGridSizes(2, input.Length, out NumThreadsPerBlock, out NumBlocks); Launcher.SetBlockSize(NumThreadsPerBlock); Launcher.SetGridSize(NumBlocks); // Perform the first "level" of the reduction int[] OutputData = new int[NumBlocks]; Reduction.SequentialAddressingKernel(input, OutputData); // If necessary, perform additional "levels" of reduction until we have only a single element (the result) while (NumBlocks > CpuThreshold) { // Calculate the number of threads and blocks based on the current input size CalculateBlockAndGridSizes(2, NumBlocks, out NumThreadsPerBlock, out NumBlocks); Launcher.SetBlockSize(NumThreadsPerBlock); Launcher.SetGridSize(NumBlocks); // Replace the current "level's" input data with the output data from the previous "level" input = OutputData; // Create a new array to hold the output data for this "level" OutputData = new int[NumBlocks]; // Call the reduction method to perform the current "level" of reduction Reduction.SequentialAddressingKernel(input, OutputData); } // Return the final reduction value return(OutputData[0]); }
/// <summary> /// /// </summary> /// <param name="input"></param> /// <returns></returns> public static int FirstReductionFromGlobal(int[] input) { // Preconditions if (input == null) { throw new ArgumentNullException("input"); } //// Perform the first "level" of the reduction //OutputData = new int[NumBlocks]; //Reduction.FirstReductionFromGlobalKernel(ClonedInputData, OutputData); //// If necessary, perform additional "levels" of reduction until we have only a single element (the result) //while (NumBlocks > CpuThreshold) //{ // // TEMP: Calculate the result from the partial block sums (for testing purposes) // int TempResult = 0; // for (int i = 0; i < OutputData.Length; i++) { TempResult += OutputData[i]; } // // Calculate the number of threads and blocks based on the current input size // CalculateBlockAndGridSizes(3, NumBlocks, out NumThreadsPerBlock, out NumBlocks); // Launcher.SetBlockSize(NumThreadsPerBlock); // Launcher.SetGridSize(NumBlocks); // // Replace the current "level's" input data with the output data from the previous "level" // ClonedInputData = OutputData; // // Create a new array to hold the output data for this "level" // OutputData = new int[NumBlocks]; // // Call the reduction method to perform the current "level" of reduction // Reduction.FirstReductionFromGlobalKernel(ClonedInputData, OutputData); //} //Watch.Stop(); //Console.Out.WriteLine("done. (Value = {0}, Time = {1:F02} ms)", OutputData[0], Watch.Elapsed.TotalMilliseconds); //Console.Out.Write("Test "); //if (OutputData[0] == CpuReductionValue) { ConsoleWriteLineColored("passed!", ConsoleColor.Green); } //else { ConsoleWriteLineColored("failed!", ConsoleColor.Red); } //Console.Out.WriteLine(); //Watch.Reset(); // Declare variables to hold the current number of threads per block and number of blocks; re-used throughout the code below int NumThreadsPerBlock = 0, NumBlocks = 0; // Calculate the block and grid sizes needed for the first "level" of the reduction CalculateBlockAndGridSizes(3, input.Length, out NumThreadsPerBlock, out NumBlocks); Launcher.SetBlockSize(NumThreadsPerBlock); Launcher.SetGridSize(NumBlocks); // Perform the first "level" of the reduction int[] OutputData = new int[NumBlocks]; Reduction.FirstReductionFromGlobalKernel(input, OutputData); // If necessary, perform additional "levels" of reduction until we have only a single element (the result) while (NumBlocks > CpuThreshold) { // Calculate the number of threads and blocks based on the current input size CalculateBlockAndGridSizes(3, NumBlocks, out NumThreadsPerBlock, out NumBlocks); Launcher.SetBlockSize(NumThreadsPerBlock); Launcher.SetGridSize(NumBlocks); // Replace the current "level's" input data with the output data from the previous "level" input = OutputData; // Create a new array to hold the output data for this "level" OutputData = new int[NumBlocks]; // Call the reduction method to perform the current "level" of reduction Reduction.FirstReductionFromGlobalKernel(input, OutputData); } // Return the final reduction value return(OutputData[0]); }
private const int NumElements = 1 << 23;// 1 << 24; // 1 << 22 = 4194304 // 1 << 24 = 16777216 // Must be power-of-two sizes static void Main(string[] args) { // Create input and output data arrays int[] InputData = new int[NumElements]; // Print 'header' Console.WriteLine("Performing shared-memory reduction tests..."); Console.WriteLine("----------------------------------------------------------------------"); Console.Write("Generating random test data ({0} {1} elements)...", NumElements, InputData.GetType().GetElementType().Name); // Fill the input data with random values; these values must fall between zero (0) and the maximum number, which if multiplied by the array length, would still fit in an int32 (signed int) value // This is to make our results easier to validate, since we don't have to deal with possible overflow behavior Random rand = new Random(); const int MaxValue = Int32.MaxValue / NumElements; for (int i = 0; i < InputData.Length; i++) { InputData[i] = rand.Next(MaxValue); } Console.WriteLine("done."); Console.WriteLine(); // Create the stopwatch we'll use to time how long each reduction takes Stopwatch Watch = new Stopwatch(); // Compute the reduction value on the CPU first so that we can compare it to the GPU-based results // TODO: Perform the reduction 2 or 3 times here to get an accurate timing result // TODO: Create a version of this project which uses PLINQ / TPL for comparison Console.WriteLine("Computing CPU-based result for comparison..."); Watch.Start(); int CpuReductionValue = 0; for (int i = 0; i < InputData.Length; i++) { CpuReductionValue += InputData[i]; } Watch.Stop(); Console.WriteLine("done. (Value = {0}, Time = {1:F02} ms)", CpuReductionValue, Watch.Elapsed.TotalMilliseconds); Console.WriteLine(); Console.WriteLine("----------------------------------------------------------------------"); Watch.Reset(); #region reduce0 (Interleaved access with modulo operator) // Start the reduction (and the timer) Console.WriteLine("Testing reduce0 (Interleaved access with modulo operator)..."); Watch.Start(); // Call the reduction method, which will iterate the reduction kernel until the entire array is reduced int InterleavedModuloResult = Reduction.InterleavedModulo(InputData); Watch.Stop(); Console.WriteLine("done. (Value = {0}, Time = {1:F02} ms)", InterleavedModuloResult, Watch.Elapsed.TotalMilliseconds); Console.Write("Test "); if (InterleavedModuloResult == CpuReductionValue) { ConsoleWriteLineColored("passed!", ConsoleColor.Green); } else { ConsoleWriteLineColored("failed!", ConsoleColor.Red); } Console.WriteLine(); Watch.Reset(); #endregion #region reduce1 (Interleaved contiguous access) // Start the reduction (and the timer) Console.WriteLine("Testing reduce1 (Interleaved contiguous access)..."); Watch.Start(); // Call the reduction method, which will iterate the reduction kernel until the entire array is reduced int InterleavedContiguousResult = Reduction.InterleavedContiguous(InputData); Watch.Stop(); Console.WriteLine("done. (Value = {0}, Time = {1:F02} ms)", InterleavedContiguousResult, Watch.Elapsed.TotalMilliseconds); Console.Write("Test "); if (InterleavedContiguousResult == CpuReductionValue) { ConsoleWriteLineColored("passed!", ConsoleColor.Green); } else { ConsoleWriteLineColored("failed!", ConsoleColor.Red); } Console.WriteLine(); Watch.Reset(); #endregion #region reduce2 (Sequential addressing) // Start the reduction (and the timer) Console.WriteLine("Testing reduce2 (Sequential addressing)..."); Watch.Start(); // Call the reduction method, which will iterate the reduction kernel until the entire array is reduced int SequentialAddressingResult = Reduction.SequentialAddressing(InputData); Watch.Stop(); Console.WriteLine("done. (Value = {0}, Time = {1:F02} ms)", SequentialAddressingResult, Watch.Elapsed.TotalMilliseconds); Console.Write("Test "); if (SequentialAddressingResult == CpuReductionValue) { ConsoleWriteLineColored("passed!", ConsoleColor.Green); } else { ConsoleWriteLineColored("failed!", ConsoleColor.Red); } Console.WriteLine(); Watch.Reset(); #endregion #region reduce3 (Sequential addressing with first reduction from global) // Start the reduction (and the timer) Console.WriteLine("Testing reduce3 (Sequential addressing with reduction from global)..."); Watch.Start(); // Call the reduction method, which will iterate the reduction kernel until the entire array is reduced int FirstReductionFromGlobalResult = Reduction.FirstReductionFromGlobal(InputData); Watch.Stop(); Console.WriteLine("done. (Value = {0}, Time = {1:F02} ms)", FirstReductionFromGlobalResult, Watch.Elapsed.TotalMilliseconds); Console.Write("Test "); if (FirstReductionFromGlobalResult == CpuReductionValue) { ConsoleWriteLineColored("passed!", ConsoleColor.Green); } else { ConsoleWriteLineColored("failed!", ConsoleColor.Red); } Console.WriteLine(); Watch.Reset(); #endregion // Print the exit message Console.WriteLine("Press any key to exit."); Console.ReadKey(); Environment.Exit(0); }