public static void InitKernels()
        {
            if (cs_radixSort != null)
            {
                return;
            }
            cs_radixSort      = Resources.Load <ComputeShader>("RadixSort");
            kn_radixSortLocal = cs_radixSort.FindKernel("RadixSortLocal");
            kn_globalShuffle  = cs_radixSort.FindKernel("GlobalShuffle");

            BlellochSumScan.InitKernels();
        }
        public void BlellochSumScanTest()
        {
            uint[] array        = new uint[ARRAY_COUNT];
            uint[] scannedArray = new uint[ARRAY_COUNT];
            for (int i = 0; i < ARRAY_COUNT; i++)
            {
                array[i] = GenerateRandomUInt();
            }

            ComputeBuffer cb_in  = new ComputeBuffer(ARRAY_COUNT, StrideSize.s_uint);
            ComputeBuffer cb_out = new ComputeBuffer(ARRAY_COUNT, StrideSize.s_uint);

            ComputeShaderUtil.InitKernels();
            BlellochSumScan.InitKernels();

            cb_in.SetData(array);
            ComputeShaderUtil.ZeroOut(ref cb_out, ARRAY_COUNT);

            BlellochSumScan blellochSumScan = new BlellochSumScan(ARRAY_COUNT);

            blellochSumScan.Scan(ref cb_in, ref cb_out, ARRAY_COUNT);

            cb_out.GetData(scannedArray);

            // using serial exclusive sum scan method to make sure that the parallel method works
            uint sum = 0;

            for (int i = 0; i < ARRAY_COUNT; i++)
            {
                Assert.AreEqual(sum, scannedArray[i]);
                sum += array[i];
            }

            cb_in.Dispose();
            cb_out.Dispose();
            blellochSumScan.Dispose();
        }