static void Main(string[] args)
        {
            Console.WriteLine("Integer vs float:");

            {
                int[][]   integerDataSet = GenerateIntegerDataSet(200, 100000);
                float[][] floatDataSet   = GenerateFloatDataSet(200, 100000);

                {
                    var watch = Stopwatch.StartNew();
                    SimpleCosineSimilarityIntegerVersion.ComputeDistances(integerDataSet);
                    Console.WriteLine("Compute with integer, result with double: " + watch.ElapsedMilliseconds + " ms");
                }

                {
                    var watch = Stopwatch.StartNew();
                    SimpleCosineSimilarityIntegerVersionResultInFloat.ComputeDistances(integerDataSet);
                    Console.WriteLine("Compute with integer, result with float: " + watch.ElapsedMilliseconds + " ms");
                }

                {
                    var watch = Stopwatch.StartNew();
                    SimpleCosineSimilarityFloatVersion.ComputeDistances(floatDataSet);
                    Console.WriteLine("Compute with float, result with float: " + watch.ElapsedMilliseconds + " ms");
                }
            }

            Console.WriteLine("\nFloat versions:\n");

            // JIT compilation.
            {
                try
                {
                    var       watch        = Stopwatch.StartNew();
                    float[][] smallDataSet = GenerateFloatDataSet(1, 1);
                    GpuCosineSimilarityFloatVersion.ComputeDistances(smallDataSet);
                    Console.WriteLine("Gpu (JIT compilation): " + watch.ElapsedMilliseconds + " ms");
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Gpu (JIT compilation): Exception: " + ex.Message);
                }
            }

            RunComparisonFloatVersions(200, 100000);
            RunComparisonFloatVersions(2000, 5000);
            RunComparisonFloatVersions(5000, 25);

            Console.WriteLine("\nInteger versions:\n");

            // JIT compilation.
            {
                try
                {
                    var     watch        = Stopwatch.StartNew();
                    int[][] smallDataSet = GenerateIntegerDataSet(1, 1);
                    GpuCosineSimilarityIntegerVersion.ComputeDistances(smallDataSet);
                    Console.WriteLine("Gpu (JIT compilation): " + watch.ElapsedMilliseconds + " ms");
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Gpu (JIT compilation): Exception: " + ex.Message);
                }
            }

            RunComparisonIntegerVersions(200, 100000);
            RunComparisonIntegerVersions(2000, 5000);
            RunComparisonIntegerVersions(5000, 25);

            // Demonstrate GPU communication cost.
            RunComparisonIntegerVersions(1, 1);

            Console.WriteLine("\nDouble versions:\n");

            // JIT compilation.
            {
                try
                {
                    var        watch        = Stopwatch.StartNew();
                    double[][] smallDataSet = GenerateDoubleDataSet(1, 1);
                    GpuCosineSimilarityDoubleVersion.ComputeDistances(smallDataSet);
                    Console.WriteLine("Gpu (JIT compilation): " + watch.ElapsedMilliseconds + " ms");
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Gpu (JIT compilation): Exception: " + ex.Message);
                }
            }

            RunComparisonDoubleVersions(2000, 5000);
            RunComparisonDoubleVersions(5000, 25);

            // Demonstrate the GPU limit.
            {
                try
                {
                    int numElement   = 10000;
                    int numDimension = 1000;
                    Console.WriteLine("\nDataset: " + numElement + "x" + numDimension);
                    double[][] dataSet = GenerateDoubleDataSet(numElement, numDimension);
                    var        result  = GpuCosineSimilarityDoubleVersion.ComputeDistances(dataSet);
                    Console.WriteLine("Gpu:                    No error");
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Gpu:                    Exception: " + ex.Message);
                }
            }

            Console.WriteLine("\nPress enter to continue...");
            Console.ReadLine();
        }
        private static void RunComparisonDoubleVersions(int numElement, int numDimension)
        {
            double[][] dataSet = GenerateDoubleDataSet(numElement, numDimension);

            Console.WriteLine("\nDataset: " + numElement + "x" + numDimension);

            double[][] distances;
            {
                var watch = Stopwatch.StartNew();
                distances = SimpleCosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: false);
                Console.WriteLine("Simple 1 thread:        " + watch.ElapsedMilliseconds + " ms");
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = SimpleCosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: true, maxDegreeOfParallelism: 2);
                Console.WriteLine("Simple 2 threads:       " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = SimpleCosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: true, maxDegreeOfParallelism: 4);
                Console.WriteLine("Simple 4 threads:       " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = SimpleCosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: true, maxDegreeOfParallelism: 8);
                Console.WriteLine("Simple 8 threads:       " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = VectorizedV1CosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: false);
                Console.WriteLine("VectorizedV1 1 thread:  " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = VectorizedV1CosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: true, maxDegreeOfParallelism: 2);
                Console.WriteLine("VectorizedV1 2 threads: " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = VectorizedV1CosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: true, maxDegreeOfParallelism: 4);
                Console.WriteLine("VectorizedV1 4 threads: " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = VectorizedV1CosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: true, maxDegreeOfParallelism: 8);
                Console.WriteLine("VectorizedV1 8 threads: " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = VectorizedV2CosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: false);
                Console.WriteLine("VectorizedV2 1 thread:  " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = VectorizedV2CosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: true, maxDegreeOfParallelism: 2);
                Console.WriteLine("VectorizedV2 2 threads: " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = VectorizedV2CosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: true, maxDegreeOfParallelism: 4);
                Console.WriteLine("VectorizedV2 4 threads: " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                var watch  = Stopwatch.StartNew();
                var result = VectorizedV2CosineSimilarityDoubleVersion.ComputeDistances(dataSet, useMultipleThread: true, maxDegreeOfParallelism: 8);
                Console.WriteLine("VectorizedV2 8 threads: " + watch.ElapsedMilliseconds + " ms");
                ValidateSameResult(distances, result);
            }

            {
                try
                {
                    var watch  = Stopwatch.StartNew();
                    var result = GpuCosineSimilarityDoubleVersion.ComputeDistances(dataSet);
                    Console.WriteLine("Gpu:                    " + watch.ElapsedMilliseconds + " ms");
                    ValidateSameResult(distances, result);
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Gpu:                    Exception: " + ex.Message);
                }
            }
        }