/// <summary>
        ///     Вызов и исполнение функции проверки что массив отсортирован
        /// </summary>
        public static void ExecuteSorted(int direction = 1)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);


            int[] devA = gpu.Allocate(_a);
            int[] devB = gpu.Allocate(_b);
            int[] devC = gpu.Allocate(_c);
            int[] devD = gpu.Allocate(D);

            gpu.CopyToDevice(_a, devA);

            gpu.Launch(1, 1).Split(devA, devB, devC, _middle);
            gpu.Launch(_gridSize, _blockSize).Sorted(devA, devB, devC, devD, 0, direction);
            gpu.Launch(1, 1).Sorted(devA, devB, devC, devD, 1, direction);

            gpu.CopyFromDevice(devD, D);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
        public static void cudaTranspose(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm)
        {
            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            GPGPUBLAS blas = GPGPUBLAS.Create(gpu);

            int cols = dm.ColumnCount, rows = dm.RowCount;
            int restRows = rows - cols;

            //double[] a = dm.Storage.ToColumnMajorArray();
            double[] a = dm.SubMatrix(0, cols, 0, cols).Storage.ToColumnMajorArray();
            double[] b = dm.SubMatrix(cols, restRows, 0, cols).Storage.ToColumnMajorArray();
            dm = null;

            double[] a_d = gpu.CopyToDevice <double>(a);
            a = null;
            double[] c_d = gpu.Allocate <double>(cols * cols);
            double[] x_d = gpu.CopyToDevice <double>(new double[] { 1 });
            blas.GEMV(cols, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            a = new double[cols * rows];
            gpu.CopyFromDevice <double>(c_d, 0, a, 0, cols * cols);
            gpu.FreeAll();
            a_d = gpu.CopyToDevice <double>(b);
            b   = null;
            c_d = gpu.Allocate <double>(restRows * cols);
            x_d = gpu.CopyToDevice <double>(new double[] { 1 });
            blas.GEMV(restRows, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            gpu.CopyFromDevice <double>(c_d, 0, a, cols * cols, restRows * cols);
            gpu.FreeAll();
            dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, rows, a);
        }
Beispiel #3
0
        public void SetUp()
        {
            //CudafyModes.Architecture = eArchitecture.sm_30;
            _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId);
            Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported.");

            _cm = CudafyModule.TryDeserialize();
            if (_cm == null || !_cm.TryVerifyChecksums())
            {
                _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30);
                Console.WriteLine(_cm.CompilerOutput);
                _cm.TrySerialize();
            }

            _gpu.LoadModule(_cm);

            inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc,
                                        0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb }; // arbitrary values
            d_inputIntArray  = _gpu.CopyToDevice(inputIntArray);
            d_outputIntArray = _gpu.Allocate <int>(WARP_SIZE);
            gpuIntResult     = new int[WARP_SIZE];
            cpuIntResult     = new int[WARP_SIZE];

            inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f,
                                            1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f,
                                            7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f,
                                            377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f }; // arbitrary values
            d_inputFloatArray  = _gpu.CopyToDevice(inputFloatArray);
            d_outputFloatArray = _gpu.Allocate <float>(WARP_SIZE);
            gpuFloatResult     = new float[WARP_SIZE];
            cpuFloatResult     = new float[WARP_SIZE];
        }
Beispiel #4
0
        public static int MA(int[,] A, int[,] B, int[,] C, GPGPU gpu, int maxTheadBlockSize, int Size)
        {
            // allocate the memory on the GPU
            int[,] GPU_A = gpu.Allocate<int>(A);
            int[,] GPU_B = gpu.Allocate<int>(B);
            int[,] GPU_C = gpu.Allocate<int>(C);

            // copy the arrays 'a' and 'b' to the GPU
            gpu.CopyToDevice(A, GPU_A);
            gpu.CopyToDevice(B, GPU_B);
            dim3 threadsPerBlock;
            // find the number of threads and blocks
            if (Size < maxTheadBlockSize)
            {
                threadsPerBlock = new dim3(Size, Size);
            }
            else
            {
                threadsPerBlock = new dim3(maxTheadBlockSize, maxTheadBlockSize);
            }
            dim3 block = new dim3(Size, Size);

            // launch GPU_MA
            gpu.Launch(block, threadsPerBlock, "GPU_MA", GPU_A, GPU_B, GPU_C, Size);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(GPU_C, C);

            gpu.Free(GPU_A);
            gpu.Free(GPU_B);
            gpu.Free(GPU_C);
            return 1;
        }
Beispiel #5
0
        public static uint[] Evaluate(ulong[] hands, int numCards)
        {
            // Translates this class to CUDA C and then compliles
            CudafyModule km = CudafyTranslator.Cudafy();//eArchitecture.sm_20);

            // Get the first GPU and load the module
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int blockSize = 256;
            int blockx    = hands.Length / blockSize;

            if (hands.Length % blockSize != 0)
            {
                blockx++;
            }

            ulong[] dev_hands = gpu.Allocate <ulong>(hands.Length);
            uint[]  dev_ranks = gpu.Allocate <uint>(hands.Length);

            gpu.CopyToDevice(hands, dev_hands);

            gpu.StartTimer();
            gpu.Launch(blockx, blockSize).evaluate(dev_hands, numCards, hands.Length, dev_ranks);
            var ts = gpu.StopTimer();

            uint[] toReturn = new uint[hands.Length];
            gpu.CopyFromDevice(dev_ranks, toReturn);

            return(toReturn);
        }
Beispiel #6
0
        private float cuda_malloc_test(int size, bool up)
        {
            int[] a = new int[size];

            int[] dev_a = _gpu.Allocate <int>(size);

            _gpu.StartTimer();

            for (int i = 0; i < 100; i++)
            {
                if (up)
                {
                    _gpu.CopyToDevice(a, dev_a);
                }
                else
                {
                    _gpu.CopyFromDevice(dev_a, a);
                }
            }

            float elapsedTime = _gpu.StopTimer();

            _gpu.FreeAll();

            GC.Collect();
            return(elapsedTime);
        }
Beispiel #7
0
        public void Test_TwoThreadCopy()
        {
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
            _gpuuintBufferIn3 = _gpu.Allocate(_uintBufferIn1);
            _gpuuintBufferIn4 = _gpu.Allocate(_uintBufferIn1);
            _gpu.EnableMultithreading();
            bool j1 = false;
            bool j2 = false;

            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine(i);
                SetInputs();
                ClearOutputs();
                Thread t1 = new Thread(Test_TwoThreadCopy_Thread1);
                Thread t2 = new Thread(Test_TwoThreadCopy_Thread2);
                t1.Start();
                t2.Start();
                j1 = t1.Join(10000);
                j2 = t2.Join(10000);
                if (!j1 || !j2)
                {
                    break;
                }
            }

            _gpu.DisableMultithreading();
            _gpu.FreeAll();
            Assert.IsTrue(j1);
            Assert.IsTrue(j2);
        }
 public void SetUp()
 {
     _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
     
     _hostInput = new double[N * BATCH];
     _hostInputCplx = new ComplexD[N * BATCH];
     _hostOutput = new double[N * BATCH];
     _hostOutputCplx = new ComplexD[N * BATCH];
     _devInput = _gpu.Allocate(_hostInput);
     _devInputCplx = _gpu.Allocate(_hostInputCplx);
     _devInter = _gpu.Allocate<double>(N * 2 * BATCH);
     _devInterCplx = _gpu.Allocate<ComplexD>(N * BATCH);
     _devOutput = _gpu.Allocate(_hostOutput);
     _devOutputCplx = _gpu.Allocate(_hostOutputCplx);
     _fft = GPGPUFFT.Create(_gpu);
     for (int b = 0; b < BATCH; b++)
     {
         for (int i = 0; i < N; i++)
         {
             ComplexD cf = new ComplexD();
             cf.x = (double)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180)));
             cf.y = (double)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180)));
             _hostInput[i + b * N] = cf.x;
             _hostInputCplx[i + b * N] = cf;
         }
     }
 }
        public void SetUp()
        {
            //CudafyModes.Architecture = eArchitecture.sm_30;
            _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId);
            Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported.");

            _cm = CudafyModule.TryDeserialize();
            if (_cm == null || !_cm.TryVerifyChecksums())
            {
                _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30);
                Console.WriteLine(_cm.CompilerOutput);
                _cm.TrySerialize();
            }

            _gpu.LoadModule(_cm);

            inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc,
                                        0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb}; // arbitrary values
            d_inputIntArray = _gpu.CopyToDevice(inputIntArray);
            d_outputIntArray = _gpu.Allocate<int>(WARP_SIZE);
            gpuIntResult = new int[WARP_SIZE];
            cpuIntResult = new int[WARP_SIZE];

            inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f,
                                            1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f,
                                            7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f,
                                            377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f}; // arbitrary values
            d_inputFloatArray = _gpu.CopyToDevice(inputFloatArray);
            d_outputFloatArray = _gpu.Allocate<float>(WARP_SIZE);
            gpuFloatResult = new float[WARP_SIZE];
            cpuFloatResult = new float[WARP_SIZE];
        }
        /// <summary>
        ///     Выполнение сортировки слияниями
        ///     Пример использования:
        ///     CudafySequencies.SetSequencies(arrayOfArray,arrayOfArray);
        ///     CudafySequencies.Execute("Compare");
        ///     var compare = CudafySequencies.GetMartix();
        ///     CudafyArray.SetArray(Enumerable.Range(0,n).ToArray());
        ///     CudafyArray.SetCompare(compare);
        ///     CudafyArray.MergeSort();
        ///     var indexesOfSorted = CudafyArray.GetArray();
        /// </summary>
        public static void MergeSort(int direction = 1)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            int[] devA = gpu.Allocate(_a);
            int[] devB = gpu.Allocate(_b);

            gpu.CopyToDevice(_a, devA);

            for (int i = 0; i < _ceiling; i++)
            {
                int gridSize  = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333));
                int blockSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333));
                gpu.Launch(gridSize, blockSize)
                .MergeLinear(((i & 1) == 0) ? devA : devB, ((i & 1) == 0) ? devB : devA, i, 0,
                             _length,
                             direction);
            }
            gpu.CopyFromDevice(((_ceiling & 1) == 0) ? devA : devB, _a);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Beispiel #11
0
 public void SetUp()
 {
     _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
     _blas = GPGPUBLAS.Create(_gpu);
     _hostInput =  new float[ciROWS, ciCOLS];
     _hostInput2 = new float[ciROWS, ciCOLS];
     _hostOutput = new float[ciROWS, ciCOLS];
     _devPtr = _gpu.Allocate<float>(_hostInput);
     _devPtr2 = _gpu.Allocate<float>(_hostOutput);
 }
Beispiel #12
0
 public void SetUp()
 {
     _gpu        = CudafyHost.CreateDevice(CudafyModes.Target);
     _blas       = GPGPUBLAS.Create(_gpu);
     _hostInput  = new float[ciROWS, ciCOLS];
     _hostInput2 = new float[ciROWS, ciCOLS];
     _hostOutput = new float[ciROWS, ciCOLS];
     _devPtr     = _gpu.Allocate <float>(_hostInput);
     _devPtr2    = _gpu.Allocate <float>(_hostOutput);
 }
Beispiel #13
0
        public static void Execute(byte[] bitmap)
        {
            CudafyModule km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy(typeof(SphereOpenCL), typeof(ray_opencl));
                km.TrySerialize();
            }

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            // capture the start time
            gpu.StartTimer();

            // allocate memory on the GPU for the bitmap (same size as ptr)
            byte[] dev_bitmap = gpu.Allocate(bitmap);

            // allocate memory for the Sphere dataset
            SphereOpenCL[] s = gpu.Allocate <SphereOpenCL>(SPHERES);

            // allocate temp memory, initialize it, copy to constant memory on the GPU
            SphereOpenCL[] temp_s = new SphereOpenCL[SPHERES];
            for (int i = 0; i < SPHERES; i++)
            {
                temp_s[i].r = rnd(1.0f);
                temp_s[i].g = rnd(1.0f);
                temp_s[i].b = rnd(1.0f);

                temp_s[i].x      = rnd(1000.0f) - 500;
                temp_s[i].y      = rnd(1000.0f) - 500;
                temp_s[i].z      = rnd(1000.0f) - 500;
                temp_s[i].radius = rnd(100.0f) + 20;
            }
            gpu.CopyToDevice(temp_s, s);

            // generate a bitmap from our sphere data
            dim3 grids   = new dim3(ray_gui.DIM / 16, ray_gui.DIM / 16);
            dim3 threads = new dim3(16, 16);

            //gpu.Launch(grids, threads).kernel(s, dev_bitmap); // Dynamic
            gpu.Launch(grids, threads, ((Action <GThread, SphereOpenCL[], byte[]>)thekernel), s, dev_bitmap); // Strongly typed

            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(dev_bitmap, bitmap);

            // get stop time, and display the timing results
            float elapsedTime = gpu.StopTimer();

            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            gpu.FreeAll();
        }
Beispiel #14
0
 public void SetUp()
 {
     _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
     _blas = GPGPUBLAS.Create(_gpu);
     Console.Write("BLAS Version={0}", _blas.GetVersion());
     _hostInput1 = new float[ciN];
     _hostInput2 = new float[ciN];
     _hostOutput1 = new float[ciN];
     _hostOutput2 = new float[ciN];
     _devPtr1 = _gpu.Allocate<float>(_hostInput1);
     _devPtr2 = _gpu.Allocate<float>(_hostOutput1);
 }
Beispiel #15
0
 public void SetUp()
 {
     _gpu  = CudafyHost.CreateDevice(CudafyModes.Target);
     _blas = GPGPUBLAS.Create(_gpu);
     Console.Write("BLAS Version={0}", _blas.GetVersion());
     _hostInput1  = new float[ciN];
     _hostInput2  = new float[ciN];
     _hostOutput1 = new float[ciN];
     _hostOutput2 = new float[ciN];
     _devPtr1     = _gpu.Allocate <float>(_hostInput1);
     _devPtr2     = _gpu.Allocate <float>(_hostOutput1);
 }
Beispiel #16
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            float c;

            // allocate memory on the cpu side
            float[] a         = new float[N];
            float[] b         = new float[N];
            float[] partial_c = new float[blocksPerGrid];

            // allocate the memory on the GPU
            float[] dev_a         = gpu.Allocate <float>(N);
            float[] dev_b         = gpu.Allocate <float>(N);
            float[] dev_partial_c = gpu.Allocate <float>(blocksPerGrid);

            float[] dev_test = gpu.Allocate <float>(blocksPerGrid * blocksPerGrid);

            // fill in the host memory with data
            for (int i = 0; i < N; i++)
            {
                a[i] = i;
                b[i] = i * 2;
            }

            // copy the arrays 'a' and 'b' to the GPU
            gpu.CopyToDevice(a, dev_a);
            gpu.CopyToDevice(b, dev_b);

            gpu.Launch(blocksPerGrid, threadsPerBlock).Dot(dev_a, dev_b, dev_partial_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_partial_c, partial_c);

            // finish up on the CPU side
            c = 0;
            for (int i = 0; i < blocksPerGrid; i++)
            {
                c += partial_c[i];
            }

            Console.WriteLine("Does GPU value {0} = {1}?\n", c, 2 * sum_squares((float)(N - 1)));

            // free memory on the gpu side
            gpu.FreeAll();

            // free memory on the cpu side
            // No worries...
        }
Beispiel #17
0
        public static void Execute()
        {
            CudafyModule km  = CudafyTranslator.Cudafy(Program.testArchitecture);
            GPGPU        gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);

            gpu.LoadModule(km);

            const int warps          = 4;
            const int count          = warps * 32;
            var       random         = new Random();
            var       input          = new int[count];
            var       output         = new int[count / 32];
            var       expectedOutput = new int[count / 32];

            for (var i = 0; i < warps; i++)
            {
                expectedOutput[i] = 0;
            }

            for (var i = 0; i < count; i++)
            {
                input[i] = random.Next(2);
            }

            for (var i = 0; i < count; i++)
            {
                expectedOutput[i / 32] += input[i] << (i % 32);
            }


            var devInput  = gpu.Allocate <int>(count);
            var devOutput = gpu.Allocate <int>(warps);

            gpu.CopyToDevice(input, devInput);

            gpu.Launch(1, count, "BallotKernel", devInput, devOutput);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(devOutput, output);

            gpu.Free(devInput);
            gpu.Free(devOutput);

            for (var i = 0; i < warps; i++)
            {
                Console.WriteLine("Warp {0} Ballot: {1}", i, output[i]);
                Console.WriteLine("Expected: {0} \t{1}", expectedOutput[i], expectedOutput[i] == output[i] ? "PASSED" : "FAILED");
            }
        }
Beispiel #18
0
        public static float[] CallGPU()
        {
            CudafyModes.Target        = eGPUType.OpenCL;
            CudafyModes.DeviceId      = 0;
            CudafyTranslator.Language = eLanguage.OpenCL;
            CudafyModule km  = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.OpenCL, typeof(GPU));
            GPGPU        gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0);

            gpu.LoadModule(km);
            km.Serialize();

            float[] input = Utils.GenerateRandomVector();
            float[,,] NN = Utils.GenerateRandomMatrix().AsSingleDimension();
            float[] output = new float[Utils.N];

            Stopwatch gpuSW = new Stopwatch();

            gpuSW.Start();
            float[] dev_output = gpu.Allocate <float>(output);
            float[] dev_input  = gpu.CopyToDevice(input);
            float[,,] dev_NN = gpu.CopyToDevice(NN);
            gpu.Launch(Utils.GRID_SIZE, Utils.BLOCK_SIZE).CalculateNeuralNetwork(dev_input, dev_NN, dev_output);
            gpu.CopyFromDevice(dev_output, output);
            gpu.FreeAll();
            gpuSW.Stop();
            Console.WriteLine("GPU: " + gpuSW.ElapsedMilliseconds);
            return(output);
        }
Beispiel #19
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile));

            _gpu = CudafyHost.GetDevice(CudafyModes.Target);
            _gpu.LoadModule(km);

            ParamsStruct[] host_par = new ParamsStruct[1];
            ParamsStruct[] result = new ParamsStruct[1];
            host_par[0].OP = 96.95;
            host_par[0].Price = 1332.24;
            host_par[0].Strike = 1235;
            host_par[0].TD = 31;
            host_par[0].R = 0.0001355;
            host_par[0].Q = 0.0166;
            host_par[0].N = 100;// 1000;
            host_par[0].kind = 1;

            ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par);
            float[] PA = _gpu.Allocate<float>(1001);
            _gpu.Launch(1,1, "impliedVolatile", dev_par, PA);

            _gpu.CopyFromDevice(dev_par, 0, result, 0, 1);

            Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B);           
            //Console.ReadKey();
        }
Beispiel #20
0
        static Bitmap Render(GPGPU gpu, int frameNum)
        {
            uint[,] deviceImage = gpu.Allocate <uint>(width, height);

            float[] pX1_gpu = gpu.CopyToDevice <float>(pX1);
            float[] pY1_gpu = gpu.CopyToDevice <float>(pY1);
            float[] pZ1_gpu = gpu.CopyToDevice <float>(pZ1);

            float[] colorPosition_gpu = gpu.CopyToDevice <float>(colorPosition);
            float[] currentTime_gpu   = gpu.CopyToDevice <float>(currentTime);

            dim3 threadsPerBlock = new dim3(8, 8);
            dim3 numBlocks       = new dim3(width / threadsPerBlock.x, height / threadsPerBlock.y);

            gpu.Launch(numBlocks, threadsPerBlock).renderKernel(deviceImage, pX1_gpu, pY1_gpu, pZ1_gpu, colorPosition_gpu, currentTime_gpu);

            uint[,] finalImage = new uint[width, height];
            gpu.CopyFromDevice <uint>(deviceImage, finalImage);

            gpu.Free(deviceImage);
            gpu.Free(pX1_gpu);
            gpu.Free(pY1_gpu);
            gpu.Free(pZ1_gpu);

            gpu.Free(colorPosition_gpu);
            gpu.Free(currentTime_gpu);

            GCHandle pixels = GCHandle.Alloc(finalImage, GCHandleType.Pinned);
            Bitmap   bmp    = new Bitmap(width, height, width * sizeof(int), PixelFormat.Format32bppRgb, pixels.AddrOfPinnedObject());

            bmp.Save("spring" + frameNum + ".png");
            pixels.Free();

            return(bmp);
        }
Beispiel #21
0
        public static void Execute()
        {
            CudafyModule km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy(typeof(Generic <ushort, ushort>), typeof(SimpleGeneric));
                km.Serialize();
            }

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target);

            gpu.LoadModule(km);

            var input = new Generic <ushort, ushort>();

            input.A = 187;

            int[] devoutput = gpu.Allocate <int>(1);
            gpu.Launch(1, 1, "Kernel", input, devoutput);

            int output;

            gpu.CopyFromDevice(devoutput, out output);

            Console.WriteLine("Simple Generic: " + ((output == 1) ? "PASSED" : "FAILED"));
        }
 public void SetUp()
 {
     _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
      Console.WriteLine("CUDA driver version={0}", _gpu.GetDriverVersion());
     _fft = GPGPUFFT.Create(_gpu);
     _hostInput = new float[N * BATCH];
     _hostInputCplx = new ComplexF[N * BATCH];
     _hostOutput = new float[N * BATCH];
     _hostOutputCplx = new ComplexF[N * BATCH];
     _devInput = _gpu.Allocate(_hostInput);
     _devInputCplx = _gpu.Allocate(_hostInputCplx);
     _devInter = _gpu.Allocate<float>(N * 2 * BATCH);
     _devInterCplx = _gpu.Allocate<ComplexF>(N * BATCH);
     _devOutput = _gpu.Allocate(_hostOutput);
     _devOutputCplx = _gpu.Allocate(_hostOutputCplx);
     Console.WriteLine("CUFFT version={0}", _fft.GetVersion());
     for (int b = 0; b < BATCH; b++)
     {
         for (int i = 0; i < N; i++)
         {
             ComplexF cf = new ComplexF();
             cf.x = (float)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180)));
             cf.y = (float)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180)));
             _hostInput[i + b * N] = cf.x;
             _hostInputCplx[i + b * N] = cf;
         }
     }
 }
Beispiel #23
0
        public static bool TestGpuDoublePrecision(int DeviceId)
        {
            if (DeviceId > CudafyHost.GetDeviceCount(eGPUType.OpenCL))
            {
                return(false);
            }

            try
            {
                CudafyModes.Target        = eGPUType.OpenCL;
                CudafyTranslator.Language = eLanguage.OpenCL;
                CudafyModule km  = CudafyTranslator.Cudafy();
                GPGPU        gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId);
                gpu.LoadModule(km);

                double   c;
                double[] dev_c = gpu.Allocate <double>();
                gpu.Launch().add_double(2.5d, 7.5d, dev_c);
                gpu.CopyFromDevice(dev_c, out c);
                gpu.Free(dev_c);
                return(c == 10.0d);
            }
            catch
            { return(false); }
        }
        public static void Example2(GPGPU gpu)
        {
            ArrayView view1 = new ArrayView();
            ArrayView view2 = new ArrayView();

            float[] data = Enumerable.Range(0, 1000).Select(t => (float)t).ToArray();
            // Two views of the array, simply applying an offset to the array; could slice instead for example.
            view1.CreateView(data, 100);
            view2.CreateView(data, 200);

            for (int i = 0; i < 1000; ++i)
            {
                data[i] = data[i] * 10f;
            }
            // Should copy the 'large' array to the device only once; this is referenced by each ArrayView instance.
            var dev_view1 = DeviceClassHelper.CreateDeviceObject(gpu, view1);
            var dev_view2 = DeviceClassHelper.CreateDeviceObject(gpu, view2);

            var dev_result = gpu.Allocate <float>(5);
            var hostResult = new float[5];

            gpu.Launch(1, 1).Test2(dev_view1, dev_view2, dev_result);
            gpu.CopyFromDevice(dev_result, hostResult);

            bool pass = (hostResult[0] == 1050f && hostResult[1] == 7f);

            Console.WriteLine(pass ? "Pass" : "Fail");
        }
Beispiel #25
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile));

            _gpu = CudafyHost.GetDevice(CudafyModes.Target);
            _gpu.LoadModule(km);

            ParamsStruct[] host_par = new ParamsStruct[1];
            ParamsStruct[] result   = new ParamsStruct[1];
            host_par[0].OP     = 96.95;
            host_par[0].Price  = 1332.24;
            host_par[0].Strike = 1235;
            host_par[0].TD     = 31;
            host_par[0].R      = 0.0001355;
            host_par[0].Q      = 0.0166;
            host_par[0].N      = 100;// 1000;
            host_par[0].kind   = 1;

            ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par);
            float[]        PA      = _gpu.Allocate <float>(1001);
            _gpu.Launch(1, 1, "impliedVolatile", dev_par, PA);

            _gpu.CopyFromDevice(dev_par, 0, result, 0, 1);

            Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B);
            //Console.ReadKey();
        }
        public void ExeTestKernel()
        {
            GPGPU         gpu  = CudafyHost.GetDevice(CudafyModes.Target, 0);
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            int[] host_results = new int[N];

            // Either assign a new block of memory to hold results on device
            var dev_results = gpu.Allocate <int>(N);

            // Or fill your array with values first and then
            for (int i = 0; i < N; i++)
            {
                host_results[i] = i * 3;
            }

            // Copy array with ints to device
            var dev_filled_results = gpu.CopyToDevice(host_results);

            // 64*16 = 1024 threads per block (which is max for sm_30)
            dim3 threadsPerBlock = new dim3(64, 16);

            // 8*8 = 64 blocks per grid , just for show so you get varying numbers
            dim3 blocksPerGrid = new dim3(8, 8);

            //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0
            //var blocksPerGrid = 1;      // just for show

            gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results);

            gpu.CopyFromDevice(dev_results, host_results);
        }
        /// <summary>
        ///     Вызов и исполнение одной элементарной функции по имени функции
        /// </summary>
        /// <param name="function"></param>
        public static void Execute(string function)
        {
            Debug.Assert(_indexes1.Last() == _sequencies1.Length);
            Debug.Assert(_indexes2.Last() == _sequencies2.Length);

            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            // copy the arrays 'a' and 'b' to the GPU
            int[] devIndexes1    = gpu.CopyToDevice(_indexes1);
            int[] devIndexes2    = gpu.CopyToDevice(_indexes2);
            int[] devSequencies1 = gpu.CopyToDevice(_sequencies1);
            int[] devSequencies2 = gpu.CopyToDevice(_sequencies2);
            int[,] devMatrix = gpu.Allocate(_matrix);

            int rows    = _matrix.GetLength(0);
            int columns = _matrix.GetLength(1);

            dim3 gridSize  = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333));
            dim3 blockSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333));

            gpu.Launch(gridSize, blockSize, function,
                       devSequencies1, devIndexes1,
                       devSequencies2, devIndexes2,
                       devMatrix);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(devMatrix, _matrix);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Beispiel #28
0
        public static void primaGPU()
        {
            CudafyModule modul_kernel = CudafyTranslator.Cudafy();
            GPGPU        vga          = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            vga.LoadModule(modul_kernel);

            Stopwatch waktu = new Stopwatch();

            waktu.Start();
            int[] list_cpu = new int[KONSTANTA_THREAD];
            int[] list_cpy = new int[KONSTANTA_THREAD];
            int[] list     = vga.Allocate <int>(KONSTANTA_THREAD);
            vga.Launch(KONSTANTA_THREAD, 1).ModulAtomic(list);
            vga.CopyFromDevice(list, list_cpy);
            vga.FreeAll();

            int index = 0;

            for (int z = 0; z < list_cpy.Length; z++)
            {
                if (list_cpy[z] != -1)
                {
                    list_cpu[index] = list_cpy[z];
                    //Console.WriteLine(list_cpu[index]);
                    index++;
                }
            }
            waktu.Stop();
            TimeSpan ts    = waktu.Elapsed;
            String   total = ts.Seconds.ToString();

            Console.WriteLine("Total GPU ------ {0} detik> ", total);
        }
Beispiel #29
0
        public static void eksekusi()
        {
            CudafyModule kernel_modul = CudafyTranslator.Cudafy();
            GPGPU        vga          = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            vga.LoadModule(kernel_modul);
            Stopwatch waktu = new Stopwatch();

            waktu.Start();
            int[] array_vga   = vga.Allocate <int>(KONSTANTA_THREAD);
            int[] array_hasil = new int[KONSTANTA_THREAD];

            //long[] matriks1 = vga.Allocate<long>(KONSTANTA_THREAD);
            //long[] matriks2 = vga.Allocate<long>(KONSTANTA_THREAD);//new int[KONSTANTA_THREAD];
            //long[] matriks3 = vga.Allocate<long>(KONSTANTA_THREAD); //[KONSTANTA_THREAD];

            vga.Launch(KONSTANTA_THREAD, 1).fungsiAtomic(array_vga);
            vga.CopyFromDevice(array_vga, array_hasil);
            vga.FreeAll();

            //for(int z = 0; z < array_hasil.Length; z++)
            //{
            //    Console.WriteLine("Hasil Ekstrak----" + array_hasil[z]);
            //}
            vga.FreeAll();
            waktu.Stop();
            TimeSpan ts    = waktu.Elapsed;
            String   total = ts.Milliseconds.ToString();

            Console.WriteLine("Total VGA ------ > " + total);
        }
        public void SetUp()
        {
            _gpu    = CudafyHost.CreateDevice(CudafyModes.Target);
            _sparse = GPGPUSPARSE.Create(_gpu);

            _hiMatrixMN   = new double[M * N];
            _hiMatrixMN2  = new double[M * N];
            _hoMatrixMN   = new double[M * N];
            _hoPerVector  = new int[M];
            _hoPerVector2 = new int[N];

            _diPerVector2 = _gpu.Allocate(_hoPerVector2);
            _diMatrixMN   = _gpu.Allocate(_hiMatrixMN);
            _diMatrixMN2  = _gpu.Allocate(_hiMatrixMN2);
            _diPerVector  = _gpu.Allocate(_hoPerVector);
        }
Beispiel #31
0
        public void SetUp()
        {
            _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
            _sparse = GPGPUSPARSE.Create(_gpu);

            _hiMatrixMN = new double[M * N];
            _hiMatrixMN2 = new double[M * N];
            _hoMatrixMN = new double[M * N];
            _hoPerVector = new int[M];
            _hoPerVector2 = new int[N];

            _diPerVector2 = _gpu.Allocate(_hoPerVector2);
            _diMatrixMN = _gpu.Allocate(_hiMatrixMN);
            _diMatrixMN2 = _gpu.Allocate(_hiMatrixMN2);
            _diPerVector = _gpu.Allocate(_hoPerVector);

        }
Beispiel #32
0
        /// <summary>
        ///     Приведение матрицы к "каноническому" виду, методом Гаусса-Жордана,
        ///     то есть к матрице, получаемой в результате эквивалентных преобразований
        ///     над строками, и у которой выполнено следующее - если i - индекс первого ненулевого значения в строке, то во всех
        ///     остальных строках матрицы по индексу i содержится только ноль.
        ///     Очевидно, что если индекса первого нулевого значения нет (-1), то вся строка нулевая.
        ///     Приведение матрицы к каноническому виду используется при решении систем линейных уравнений и при поиске
        ///     фундаментальной системы решений системы линейных уравнений.
        ///     В данной реализации используется матрица на полем GF(2), то есть булева матрица.
        /// </summary>
        /// <param name="function"></param>
        public static void ExecuteGaussJordan()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            int[,] devA = gpu.Allocate(_a);
            int[,] devB = gpu.Allocate(_b);
            int[] devC = gpu.Allocate(_c);
            int[] devD = gpu.Allocate(_d);
            int[] devE = gpu.Allocate(E);

            gpu.CopyToDevice(_a, devA);

            int rows    = _a.GetLength(0);
            int columns = _a.GetLength(1);

            dim3 gridSize  = Math.Min(15, (int)Math.Pow(rows * columns, 0.33333333333));
            dim3 blockSize = Math.Min(15, (int)Math.Pow(rows * columns, 0.33333333333));

            gpu.Launch(gridSize, blockSize, "RepeatZero", devA, devB, devC, devD, devE);
            for (int i = 0; i < Math.Min(rows, columns); i++)
            {
                gpu.Launch(gridSize, blockSize, "IndexOfNonZero", devA, devB, devC, devD, devE);
                gpu.CopyFromDevice(devC, _c);
                while (i < Math.Min(rows, columns) && _c[i] == -1)
                {
                    i++;
                }
                if (i >= Math.Min(rows, columns))
                {
                    break;
                }
                int j = _c[i];
                gpu.Launch(gridSize, blockSize, "BooleanGaussJordan", devA, devB, i, j);
                int[,] t = devA;
                devA     = devB;
                devB     = t;
            }

            gpu.CopyFromDevice(devA, _a);
            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Beispiel #33
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int[] a = new int[N];
            int[] b = new int[N];
            int[] c = new int[N];

            // allocate the memory on the GPU
            int[] dev_a = gpu.Allocate <int>(a);
            int[] dev_b = gpu.Allocate <int>(b);
            int[] dev_c = gpu.Allocate <int>(c);

            // fill the arrays 'a' and 'b' on the CPU
            for (int i = 0; i < N; i++)
            {
                a[i] = -i;
                b[i] = i * i;
            }

            // copy the arrays 'a' and 'b' to the GPU
            gpu.CopyToDevice(a, dev_a);
            gpu.CopyToDevice(b, dev_b);

            // launch add on N threads (really blocks)
            gpu.Launch(N, 1).adder(dev_a, dev_b, dev_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_c, c);

            // display the results
            for (int i = 0; i < N; i++)
            {
                Console.WriteLine("{0} + {1} = {2}", a[i], b[i], c[i]);
            }

            // free the memory allocated on the GPU
            gpu.Free(dev_a);
            gpu.Free(dev_b);
            gpu.Free(dev_c);
        }
Beispiel #34
0
        public static void Execute(byte[] bitmap)
        {
            DateTime     dt = DateTime.Now;
            CudafyModule km = CudafyModule.TryDeserialize(csFILENAME);

            // Check the module exists and matches the .NET modules, else make new
            if (km == null || !km.TryVerifyChecksums())
            {
                Console.WriteLine("There was no cached module available so we make a new one.");
                km = CudafyModule.Deserialize(typeof(ray_serialize).Name);
                km.Serialize(csFILENAME);
            }

            GPGPU gpu = CudafyHost.GetGPGPU(CudafyModes.Target, 1);

            gpu.LoadModule(km);

            Console.WriteLine("Time taken to load module: {0}ms", DateTime.Now.Subtract(dt).Milliseconds);

            // capture the start time
            gpu.StartTimer();

            // allocate memory on the GPU for the bitmap (same size as ptr)
            byte[] dev_bitmap = gpu.Allocate(bitmap);

            // allocate temp memory, initialize it, copy to constant memory on the GPU
            Sphere[] temp_s = new Sphere[SPHERES];
            for (int i = 0; i < SPHERES; i++)
            {
                temp_s[i].r = rnd(1.0f);
                temp_s[i].g = rnd(1.0f);
                temp_s[i].b = rnd(1.0f);

                temp_s[i].x      = rnd(1000.0f) - 500;
                temp_s[i].y      = rnd(1000.0f) - 500;
                temp_s[i].z      = rnd(1000.0f) - 500;
                temp_s[i].radius = rnd(100.0f) + 20;
            }

            gpu.CopyToConstantMemory(temp_s, s);

            // generate a bitmap from our sphere data
            dim3 grids   = new dim3(DIM / 16, DIM / 16);
            dim3 threads = new dim3(16, 16);

            gpu.Launch(grids, threads, "kernel", dev_bitmap);

            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(dev_bitmap, bitmap);

            // get stop time, and display the timing results
            float elapsedTime = gpu.StopTimer();

            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            gpu.DeviceFreeAll();
        }
Beispiel #35
0
        public static float[] prepareAndCalculateFloatData(float[] prevMeasures, float[] actMeasures)
        {
            if ((prevMeasures != null) && (actMeasures != null))
            {
                float[] previousMeasuresGPU = gpu.Allocate <float>(prevMeasures);
                float[] actualMeasuresGPU   = gpu.Allocate <float>(actMeasures);
                gpu.CopyToDevice(prevMeasures, previousMeasuresGPU);
                gpu.CopyToDevice(actMeasures, actualMeasuresGPU);

                gpu.Launch(prevMeasures.Length, 1).calculateDataWithCudafy(previousMeasuresGPU, actualMeasuresGPU);
                gpu.CopyFromDevice(previousMeasuresGPU, prevMeasures);
                gpu.FreeAll();
                return(prevMeasures);
            }
            else
            {
                return(null);
            }
        }
Beispiel #36
0
        private void initGPU()
        {
            // Translate all members with the Cudafy attribute in the given type to CUDA and compile.
            CudafyModule km = CudafyTranslator.Cudafy(typeof(Population), typeof(UserUpdate), typeof(Fitness), typeof(FitnessParameter), typeof(PredictionPerformances), typeof(Experiment), typeof(SimOptions));

            // Get the first CUDA device and load the module generated above.
            gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);
            gpu.LoadModule(km);

            // Allocate the memory on the GPU of same size as specified arrays
            dev_fitnesses     = gpu.Allocate <float>(fs);
            dev_fitnessParams = gpu.Allocate <FitnessParameter>(options.NumberOfIndividuals);
            dev_groundTruth   = gpu.CopyToDevice(researchData.GroundTruth);
            dev_userTrust     = gpu.CopyToDevice(researchData.UserTrusts);
            dev_updates       = gpu.CopyToDevice(researchData.Updates);


            //FitnessData dev_fitnessData = gpu.CopyToDevice(fitnessData);
        }
Beispiel #37
0
        public static void Execute()
        {
            // Translates this class to CUDA C and then compliles
            CudafyModule km = CudafyTranslator.Cudafy();

            // Get the first GPU and load the module
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            // Create some arrays on the host
            int[] a = new int[N];
            int[] b = new int[N];
            int[] c = new int[N];

            // allocate the memory on the GPU
            int[] dev_c = gpu.Allocate <int>(c);

            // fill the arrays 'a' and 'b' on the CPU
            for (int i = 0; i < N; i++)
            {
                a[i] = i;
                b[i] = 2 * i;
            }

            // copy the arrays 'a' and 'b' to the GPU
            int[] dev_a = gpu.CopyToDevice(a);
            int[] dev_b = gpu.CopyToDevice(b);

            // Launch 128 blocks of 128 threads each
            gpu.Launch(128, 128).add(dev_a, dev_b, dev_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_c, c);

            // verify that the GPU did the work we requested
            bool success = true;

            for (int i = 0; i < N; i++)
            {
                if ((a[i] + b[i]) != c[i])
                {
                    Console.WriteLine("{0} + {1} != {2}", a[i], b[i], c[i]);
                    success = false;
                    break;
                }
            }
            if (success)
            {
                Console.WriteLine("We did it!");
            }

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Beispiel #38
0
        public static void Basics()
        {
            CudafyModule cm = CudafyTranslator.Cudafy(CudafyModes.Architecture);

            Console.WriteLine(cm.CompilerOutput);
            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(cm);

            int i, total;

            RandStateXORWOW[] devStates   = gpu.Allocate <RandStateXORWOW>(64 * 64);
            int[]             devResults  = gpu.Allocate <int>(64 * 64);
            int[]             hostResults = new int[64 * 64];

            gpu.Set(devResults);
#if !NET35
            gpu.Launch(64, 64).setup_kernel(devStates);
            for (i = 0; i < 10; i++)
            {
                gpu.Launch(64, 64).generate_kernel(devStates, devResults);
            }
#else
            gpu.Launch(64, 64, "setup_kernel", devStates);
            for (i = 0; i < 10; i++)
            {
                gpu.Launch(64, 64, "generate_kernel", devStates, devResults);
            }
#endif


            gpu.CopyFromDevice(devResults, hostResults);

            total = 0;
            for (i = 0; i < 64 * 64; i++)
            {
                total += hostResults[i];
            }
            Console.WriteLine("Fraction with low bit set was {0}", (float)total / (64.0f * 64.0f * 100000.0f * 10.0f));

            gpu.FreeAll();
        }
Beispiel #39
0
        public void Initialize(int bytes)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            _gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            _gpu.LoadModule(km);

            _dev_bitmap = _gpu.Allocate<byte>(bytes);

            _blocks = new dim3(DIM / 16, DIM / 16);
            _threads = new dim3(16, 16);
        }
Beispiel #40
0
        public void SetUp()
        {
            _gpu = CudafyHost.GetDevice();
            _sparse = GPGPUSPARSE.Create(_gpu);
                        
            _hiVectorX = new float[N];
            _hiVectorY = new float[N];
            _hoVectorY = new float[N];

            FillBufferSparse(_hiVectorX, out NNZ);
            FillBuffer(_hiVectorY);

            _hiIndicesX = new int[NNZ];
            _hoValsX = new float[NNZ];
            _hiValsX = new float[NNZ];

            GetSparseIndex(_hiVectorX, _hiValsX, _hiIndicesX);

            _diValsX = _gpu.Allocate(_hiValsX);
            _diIndicesX = _gpu.Allocate(_hiIndicesX);
            _diVectorY = _gpu.Allocate(_hiVectorY);

        }
Beispiel #41
0
        public static int MA(int[] A, int[] B, int[] C, int Size, int Size1d, GPGPU gpu, int max_threadsPerBlock)
        {
            // allocate the memory on the GPU
            int[] GPU_A = gpu.Allocate<int>(A);
            int[] GPU_B = gpu.Allocate<int>(B);
            int[] GPU_C = gpu.Allocate<int>(C);

            // copy the arrays 'a' and 'b' to the GPU
            gpu.CopyToDevice(A, GPU_A);
            gpu.CopyToDevice(B, GPU_B);

            int threadsPerBlock = 0;
            int blocksPerGrid = 0;

            if (Size1d < max_threadsPerBlock)
            {
                threadsPerBlock = Size1d;
                blocksPerGrid = 1;
            }
            else
            {
                threadsPerBlock = max_threadsPerBlock;
                blocksPerGrid = (Size1d / max_threadsPerBlock) + 1;
            }

            // launch GPU_MA
            gpu.Launch(threadsPerBlock, blocksPerGrid).GPU_MA(GPU_A, GPU_B, GPU_C, Size, Size1d);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(GPU_C, C);

            gpu.Free(GPU_A);
            gpu.Free(GPU_B);
            gpu.Free(GPU_C);
            return 1;
        }
        public static void Execute()
        {           
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(TextInsertion));
            Console.WriteLine(km.CompilerOutput);
            _gpu.LoadModule(km);

            int[] data = new int[64];
            int[] data_d = _gpu.CopyToDevice(data);
            int[] res_d = _gpu.Allocate(data);
            int[] res = new int[64];
            _gpu.Launch(1, 1, "AHybridMethod", data_d, res_d);
            _gpu.CopyFromDevice(data_d, res);
            for(int i = 0; i < 64; i++)
                if (data[i] != res[i])
                {
                    Console.WriteLine("Failed");
                    break;
                }
        }
        public static void Execute()
        {
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(SIMDFunctions));
            //CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.sm_12, typeof(SIMDFunctions));
            _gpu.LoadModule(km);
            int w = 1024;
            int h = 1024;

            for (int loop = 0; loop < 3; loop++)
            {
                uint[] a = new uint[w * h];
                Fill(a);
                uint[] dev_a = _gpu.CopyToDevice(a);
                uint[] b = new uint[w * h];
                Fill(b);
                uint[] dev_b = _gpu.CopyToDevice(b);
                uint[] c = new uint[w * h];
                uint[] dev_c = _gpu.Allocate(c);
                _gpu.StartTimer();
                _gpu.Launch(h, w, "SIMDFunctionTest", dev_a, dev_b, dev_c);
                _gpu.CopyFromDevice(dev_c, c);
                float time = _gpu.StopTimer();
                Console.WriteLine("Time: {0}", time);
                if (loop == 0)
                {
                    bool passed = true;
                    GThread thread = new GThread(1, 1, null);
                    for (int i = 0; i < w * h; i++)
                    {
                        uint exp = thread.vadd2(a[i], b[i]);
                        if (exp != c[i])
                            passed = false;
                    }                    
                    Console.WriteLine("Test {0}", passed ? "passed. " : "failed!");
                }
                _gpu.FreeAll();
            }
        }
Beispiel #44
0
        public static void Example1(GPGPU gpu, int threads)
        {
            double[] a = new double[threads];
            double[] b = new double[threads];
            Random r = new Random();
            for (int i = 0; i < threads; i++)
            {
                a[i] = r.NextDouble();
                b[i] = r.NextDouble();
            }

            double[] gpuarr1 = gpu.CopyToDevice(a);
            double[] gpuarr2 = gpu.CopyToDevice(b);

            double[] result = new double[threads];
            var gpuresult = gpu.Allocate<double>(result);

            gpu.Launch(threads, 1).Test2(gpuarr1, gpuarr2, gpuresult);

            gpu.CopyFromDevice(gpuresult, result);
            gpu.Free(gpuarr1);
            gpu.Free(gpuarr2);
            gpu.Free(gpuresult);
        }
Beispiel #45
0
        private void meanToolStripMenuItem_Click(object sender, EventArgs e)
        {
            DialogResult dr = new DialogResult();
            Form dlg1 = new AnalyzeForm();
            dr = dlg1.ShowDialog();
            for (int ix = 0; ix < Data.columnChoosen.Length;ix++ )
                if(Data.columnChoosen[ix]!=-1)
                    columnChoosen.Add(Data.variableView[Data.columnChoosen[ix]].nama);
            judul = "SLR";
            if (dr == DialogResult.OK)
            {
                if (columnChoosen.Count == 2)
                    {
                        int columny = Data.columnChoosen[0];
                        int columnx = Data.columnChoosen[1];
                        try
                        {
                            CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20);
                            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
                            _gpu.LoadModule(km);
                            GPGPUProperties gpprop = _gpu.GetDeviceProperties(false);
                            var sheet = reoGridControl2.CurrentWorksheet;
                            // Get the first CUDA device and load our module
                            int Ny = jumlahthread(columny);
                            int Nx = jumlahthread(columnx);
                            int N = new int();
                            if (Ny > Nx)
                                N = Ny;
                            else
                                N = Nx;
                            float[] ay = new float[Ny];
                            float[] by = new float[Ny];
                            float[] ax = new float[Nx];
                            float[] bx = new float[Nx];
                            float[] c = new float[N];

                            // fill the arrays 'a' and 'b' on the CPU
                            int jumlahDatay = jumlahdata(columny,Ny);
                            int jumlahDatax = jumlahdata(columnx,Nx);
                            ay = InitData(1, columny, Ny, ay, by);
                            by = InitData(2, columny, Ny, ay, by);
                            ax = InitData(1, columnx, Nx, ax, bx);
                            bx = InitData(2, columnx, Nx, ax, bx);

                            float temp,temp2;
                            int missingCounty = 0;
                            int missingCountx = 0;

                            for (int b = 0; b < Data.variableView[columny].missing.Count; b++)
                            {
                                for (int a = 0; a < Ny; a++)
                                {
                                    float.TryParse(Data.variableView[columny].missing[b], out temp);
                                    if (ay[a] == temp)
                                    {
                                        ay[a] = 0;
                                        missingCounty++;
                                    }
                                }
                            }
                            for (int b = 0; b < Data.variableView[columnx].missing.Count; b++)
                            {
                                for (int a = 0; a < Nx; a++)
                                {
                                    float.TryParse(Data.variableView[columnx].missing[b], out temp);
                                    if (ax[a] == temp)
                                    {
                                        ax[a] = 0;
                                        missingCountx++;
                                    }
                                }
                            }

                            if (Data.variableView[columny].missingRange.Count > 1)
                            {
                                for (int a = 0; a < Ny; a++)
                                {
                                    float.TryParse(Data.variableView[columny].missingRange[0], out temp);
                                    float.TryParse(Data.variableView[columny].missingRange[1], out temp2);
                                    if (ay[a] >= temp && ay[a] <= temp2)
                                    {
                                        ay[a] = 0;
                                        missingCounty++;
                                    }

                                }
                            }
                            if (Data.variableView[columnx].missingRange.Count > 1)
                            {
                                for (int a = 0; a < Nx; a++)
                                {
                                    float.TryParse(Data.variableView[columnx].missingRange[0], out temp);
                                    float.TryParse(Data.variableView[columnx].missingRange[1], out temp2);
                                    if (ax[a] >= temp && ax[a] <= temp2)
                                    {
                                        ax[a] = 0;
                                        missingCounty++;
                                    }

                                }
                            }

                            Debug.WriteLine("y  :  " + missingCounty + "/nx  :  " + missingCountx);

                            float[] dev_a = _gpu.CopyToDevice(ay);
                            float[] dev_b = _gpu.CopyToDevice(ax);
                            float[] dev_c = _gpu.Allocate<float>(c);
                            _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, N);
                            float[] save1 = new float[N];
                            _gpu.CopyFromDevice(dev_c, save1);
                            //_gpu.Free(dev_a);
                            //_gpu.Free(dev_b);
                            //_gpu.Free(dev_c);
                            dev_a = _gpu.CopyToDevice(by);
                            dev_b = _gpu.CopyToDevice(bx);
                            dev_c = _gpu.Allocate<float>(c);
                            _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, N);
                            float[] save2 = new float[N];
                            _gpu.CopyFromDevice(dev_c, save2);
                            //_gpu.Free(dev_a);
                            //_gpu.Free(dev_b);
                            //_gpu.Free(dev_c);
                            dev_a = _gpu.CopyToDevice(save1);
                            dev_b = _gpu.CopyToDevice(save2);
                            dev_c = _gpu.Allocate<float>(c);
                            float sumxy = jumlahan(N,dev_a,dev_b,dev_c,c);
                            //_gpu.Free(dev_a);
                            //_gpu.Free(dev_b);
                            //_gpu.Free(dev_c);
                            //results.Add(mean);
                            
                            c = new float[Nx];
                            dev_a = _gpu.CopyToDevice(ax);
                            dev_b = _gpu.CopyToDevice(bx);
                            dev_c = _gpu.Allocate<float>(c);
                            float sumx = jumlahan(Nx, dev_a, dev_b, dev_c, c);
                            //_gpu.Free(dev_a);
                            //_gpu.Free(dev_b);
                            //_gpu.Free(dev_c);
                            c = new float[Ny];
                            dev_a = _gpu.CopyToDevice(ay);
                            dev_b = _gpu.CopyToDevice(by);
                            dev_c = _gpu.Allocate<float>(c);
                            float sumy = jumlahan(Nx, dev_a, dev_b, dev_c, c);
                            //_gpu.Free(dev_a);
                            //_gpu.Free(dev_b);
                            //_gpu.Free(dev_c);
                            c = new float[N];
                            dev_a = _gpu.CopyToDevice(ax);
                            dev_b = _gpu.CopyToDevice(ax);
                            dev_c = _gpu.Allocate<float>(c);
                            _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, Nx);
                            save1 = new float[N];
                            _gpu.CopyFromDevice(dev_c, save1);
                            //_gpu.Free(dev_a);
                            //_gpu.Free(dev_b);
                            //_gpu.Free(dev_c);
                            dev_a = _gpu.CopyToDevice(bx);
                            dev_b = _gpu.CopyToDevice(bx);
                            dev_c = _gpu.Allocate<float>(c);
                            _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, Nx);
                            save2 = new float[N];
                            _gpu.CopyFromDevice(dev_c, save2);
                            //_gpu.Free(dev_a);
                            //_gpu.Free(dev_b);
                            //_gpu.Free(dev_c);
                            dev_a = _gpu.CopyToDevice(save1);
                            dev_b = _gpu.CopyToDevice(save2);
                            dev_c = _gpu.Allocate<float>(c);
                            float sumxquad = jumlahan(Nx, dev_a, dev_b, dev_c, c);
                            //_gpu.Free(dev_a);
                            //_gpu.Free(dev_b);
                            //_gpu.Free(dev_c);
                            _gpu.FreeAll();
                            
                            float jumlahData = new float();
                            if (jumlahDatax>jumlahDatay)
                                jumlahData = jumlahDatax;
                            else
                                jumlahData = jumlahDatay;

                            float beta = ((jumlahData * sumxy) - (sumx * sumy)) / ((jumlahData * sumxquad) - (sumx * sumx));
                            ab[0] = beta.ToString();

                            float alpha = (sumy / (jumlahDatay - missingCounty)) - beta * (sumx / jumlahDatax - missingCountx);
                            ab[1] = alpha.ToString();
                        }
                        catch (CudafyLanguageException cle)
                        {
                        }
                        catch (CudafyCompileException cce)
                        {
                        }
                        catch (CudafyHostException che)
                        {
                            Console.Write(che.Message);
                        }
                    }

                DialogResult dialog = new DialogResult();
                Form dialogResult = new ResultSLR();
                dialog = dialogResult.ShowDialog();
                
              //  Console.ReadLine();
            }
            else
                dlg1.Close();
        }
Beispiel #46
0
        private void varianceToolStripMenuItem_Click(object sender, EventArgs e)
        {
            DialogResult dr = new DialogResult();
            Form dlg1 = new AnalyzeForm();
            dr = dlg1.ShowDialog();
            for (int ix = 0; ix < Data.columnChoosen.Length; ix++)
                if (Data.columnChoosen[ix] != -1)
                    columnChoosen.Add(Data.variableView[Data.columnChoosen[ix]].nama);
            judul = "Variance";
            if (dr == DialogResult.OK)
            {
                for (int index = 0; index < Data.columnChoosen.Length; index++)
                    if (Data.columnChoosen[index] != -1)
                    {
                        int column = Data.columnChoosen[index];
                        try
                        {
                            CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20);
                            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
                            _gpu.LoadModule(km);
                            GPGPUProperties gpprop = _gpu.GetDeviceProperties(false);
                            var sheet = reoGridControl2.CurrentWorksheet;
                            // Get the first CUDA device and load our module
                            int N = jumlahthread(column);
                            float[] a = new float[N];
                            float[] b = new float[N];
                            float[] c = new float[N];
                            // fill the arrays 'a' and 'b' on the CPU
                            int jumlahData = jumlahdata(column,N);
                            a = InitData(1, column, N,a,b);
                            b = InitData(2, column, N,a,b);

                            float temp, temp2;
                            int missingCount = 0;

                            for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++)
                            {
                                for (int ax = 0; ax < N; ax++)
                                {
                                    float.TryParse(Data.variableView[column].missing[bx], out temp);
                                    if (a[ax] == temp)
                                    {
                                        a[ax] = 0;
                                        missingCount++;
                                    }
                                }
                            }

                            if (Data.variableView[column].missingRange.Count > 1)
                            {
                                for (int ax = 0; ax < N; ax++)
                                {
                                    float.TryParse(Data.variableView[column].missingRange[0], out temp);
                                    float.TryParse(Data.variableView[column].missingRange[1], out temp2);
                                    if (a[ax] >= temp && a[ax] <= temp2)
                                    {
                                        a[ax] = 0;
                                        missingCount++;
                                    }

                                }
                            }

                            Debug.WriteLine(missingCount);
                            
                            float[] dev_a = _gpu.CopyToDevice(a);
                            float[] dev_b = _gpu.CopyToDevice(b);
                            float[] dev_c = _gpu.Allocate<float>(c);
                            int N1 = N;
                            float hasil = jumlahan(N, dev_a, dev_b, dev_c, c);
                            float mean = (hasil / (jumlahData - missingCount));
                                                        
                            _gpu.FreeAll();

                            c = new float[N1];
                            dev_a = _gpu.CopyToDevice(a);
                            dev_b = _gpu.CopyToDevice(b);
                            dev_c = _gpu.Allocate<float>(c);
                            _gpu.Launch((N1 + 127) / 128, 128).powerVector(dev_a, dev_c, mean, N1);
                            _gpu.CopyFromDevice(dev_c, c);
                            _gpu.Free(dev_a);
                            _gpu.Free(dev_c);
                            float[] d = new float[N];
                            dev_c = _gpu.Allocate<float>(d);
                            _gpu.Launch((N1 + 127) / 128, 128).powerVector(dev_b, dev_c, mean, N1);
                            _gpu.CopyFromDevice(dev_c, d);
                            _gpu.Free(dev_b);
                            _gpu.Free(dev_c);
                            _gpu.FreeAll();
                            if (jumlahData%2 != 0)
                            {
                                d[N1-1] = 0;
                            }
                            float[] f = new float[N1];
                            hasil = new float();
                            dev_a = _gpu.CopyToDevice(c);
                            dev_b = _gpu.CopyToDevice(d);
                            dev_c = _gpu.Allocate<float>(c);
                            hasil = jumlahan(N, dev_a, dev_b, dev_c, c);
                            float variance = (hasil / (jumlahData - missingCount-1));
                            results.Add(variance);
                        }
                        catch (CudafyLanguageException cle)
                        {
                        }
                        catch (CudafyCompileException cce)
                        {
                        }
                        catch (CudafyHostException che)
                        {
                            Console.Write(che.Message);
                        }
                    }

                DialogResult dialog = new DialogResult();
                Form dialogResult = new ResultForm();
                dialog = dialogResult.ShowDialog();

                //  Console.ReadLine();
            }
            else
                dlg1.Close();
        }
Beispiel #47
0
        public void computeStdv(int column)
        {
            try
            {
                // This 'smart' method will Cudafy all members with the Cudafy attribute in the calling type (i.e. Program)
                CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20);
                // If cudafying will not work for you (CUDA SDK + VS not set up right) then comment out above and
                // uncomment below. Remember to also comment out the Structs and 3D arrays region below.
                // CUDA 5.5 SDK must be installed and cl.exe (VC++ compiler) must be in path.
                //CudafyModule km = CudafyModule.Deserialize(typeof(Program).Name);
                //var options = NvccCompilerOptions.Createx64(eArchitecture.sm_12);
                //km.CompilerOptionsList.Add(options);
                _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
                _gpu.LoadModule(km);
                GPGPUProperties gpprop = _gpu.GetDeviceProperties(false);
                var sheet = reoGridControl2.CurrentWorksheet;
                // Get the first CUDA device and load our module

                int N = sheet.RowCount / 2; //karena a dan b diisi "data" berdasar ganjil genap
                float[] a = new float[N];
                float[] b = new float[N];
                float[] c = new float[N];
                // fill the arrays 'a' and 'b' on the CPU
                jumlahData = 0;
                for (int i = 0; i < N; i++) //ini buat membagi data ke a b
                {
                    if (sheet[i, column] != null && sheet[i, column].ToString() != "")
                    {
                        float.TryParse(sheet[i, column].ToString(), out a[i]);
                        jumlahData++;
                    }

                    if (sheet[i + N, column] != null && sheet[i + N, column].ToString() != "")
                    {
                        float.TryParse(sheet[i + N, column].ToString(), out b[i]);
                        jumlahData++;

                    }
                }

                float temp, temp2;
                missingCount = 0;

                for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++)
                {
                    for (int ax = 0; ax < N; ax++)
                    {
                        float.TryParse(Data.variableView[column].missing[bx], out temp);
                        if (a[ax] == temp)
                        {
                            a[ax] = 0;
                            missingCount++;
                        }
                    }
                }

                if (Data.variableView[column].missingRange.Count > 1)
                {
                    for (int ax = 0; ax < N; ax++)
                    {
                        float.TryParse(Data.variableView[column].missingRange[0], out temp);
                        float.TryParse(Data.variableView[column].missingRange[1], out temp2);
                        if (a[ax] >= temp && a[ax] <= temp2)
                        {
                            a[ax] = 0;
                            missingCount++;
                        }

                    }
                }

                Debug.WriteLine(missingCount);
                /*              
                                float meanSequential = 0;
                                for (int i = 0; i < N; i++)
                                    meanSequential += a[i] + b[i];
                                meanSequential = meanSequential / (jumlahData - missingCount); ;
                */
                float[] dev_a = _gpu.CopyToDevice(a);
                float[] dev_b = _gpu.CopyToDevice(b);
                float[] dev_c = _gpu.Allocate<float>(c);


                bool first = true;
                int N_awal = N;
                while (N > 1)
                {


                    if (!first)
                    {
                        a = new float[N];
                        b = new float[N];
                        // c = new int[N];
                        float[] baru = new float[N];
                        for (int i = 0; i < (c.Count() - N); i++)
                            baru[i] = c[N + i];

                        dev_a = _gpu.CopyToDevice(c.Take(N).ToArray());
                        dev_b = _gpu.CopyToDevice(baru);
                        c = new float[N];
                        dev_c = _gpu.Allocate<float>(c);
                    }

                    float[] d = new float[N];
                    _gpu.CopyFromDevice(dev_a, d);
                    //      _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N);
                    _gpu.Launch((N + 127) / 128, 128).addVector(dev_a, dev_b, dev_c, N);

                    _gpu.CopyFromDevice(dev_c, c);


                    _gpu.Free(dev_a);
                    _gpu.Free(dev_b);
                    _gpu.Free(dev_c);

                    if (N % 2 == 0)
                        N = N / 2;
                    else
                        N = (N + 1) / 2;

                    first = false;
                }


                float[] mean = new float[1];
                mean[0] = (c[0] + c[1]) / (jumlahData - missingCount);
                float[] dev_mean = _gpu.CopyToDevice(mean);

                //         float[] data2 = new float[jumlahData];
                //         float[] dev_data2 = _gpu.Allocate<float>(data2);

                float[] temp4 = new float[jumlahData];
                float[] dev_temp4 = _gpu.Allocate<float>(temp4);


                float[] data = new float[jumlahData];

                for (int i = 0; i < jumlahData; i++) //parse semua data ke array data
                {
                    if (sheet[i, column] != null && sheet[i, column].ToString() != "")
                    {
                        float.TryParse(sheet[i, column].ToString(), out data[i]);

                    }
                }
                float[] dev_data = _gpu.CopyToDevice(data);

                float[] x = new float[jumlahData+1];
                float[] dev_x = _gpu.Allocate<float>(x);
                
                _gpu.Launch((jumlahData + 127) / 128, 128).minusMean(dev_mean, dev_data, dev_x, jumlahData, dev_temp4);
                _gpu.CopyFromDevice(dev_x, x);

                _gpu.Free(dev_mean);
                _gpu.Free(dev_data);
                _gpu.Free(dev_x);
                _gpu.Free(dev_temp4);



                // fill the arrays 'a' and 'b' on the CPU
                N = ((jumlahData+1) / 2);
                float[] isi1 = new float[N];
                float[] isi2 = new float[N];

                for (int i = 0; i < N; i++) //ini buat membagi data ke a b
                {
                    if (x[i] != null && x.ToString() != "")
                    {
                        float.TryParse(x[i].ToString(), out isi1[i]);

                    }

                    if (x[i + N] != null && x[i + N].ToString() != "")
                    {
                        float.TryParse(x[i + N].ToString(), out isi2[i]);
                    }
                }

                float[] isic = new float[N];
                float[] dev_isi1 = _gpu.CopyToDevice(isi1);
                float[] dev_isi2 = _gpu.CopyToDevice(isi2);
                float[] dev_isic = _gpu.Allocate<float>(isic);


                bool first1 = true;

                while (N > 1)
                {


                    if (!first1)
                    {
                        isi1 = new float[N];
                        isi2 = new float[N];
                        // c = new int[N];
                        float[] baru = new float[N];
                        for (int i = 0; i < (isic.Count() - N); i++)
                            baru[i] = isic[N + i];

                        dev_isi1 = _gpu.CopyToDevice(c.Take(N).ToArray());
                        dev_isi2 = _gpu.CopyToDevice(baru);
                        c = new float[N];
                        dev_isic = _gpu.Allocate<float>(isic);
                    }

                    float[] isid = new float[N];
                    _gpu.CopyFromDevice(dev_isi1, isid);
                    //      _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N);
                    _gpu.Launch((N + 127) / 128, 128).addVector(dev_isi1, dev_isi2, dev_isic, N);

                    _gpu.CopyFromDevice(dev_isic, isic);


                    _gpu.Free(dev_isi1);
                    _gpu.Free(dev_isi2);
                    _gpu.Free(dev_isic);

                    if (N % 2 == 0)
                        N = N / 2;
                    else
                        N = (N + 1) / 2;

                    first = false;
                }

                float temp3 = (float)Math.Sqrt((isic[0] + isic[1]) / (jumlahData - 1));


                // Debug.WriteLine("mean-nya adalah " + (c[0] + c[1]));
                Debug.WriteLine("STDV = " + ((isic[0] + isic[1]) / (jumlahData - 1)));
                results.Add(temp3);
                //for (int i = 0; i < N; i++)
                //    Debug.Assert(a[i] + b[i] == c[i]);
                _gpu.FreeAll();

            }
            catch (CudafyLanguageException cle)
            {
            }
            catch (CudafyCompileException cce)
            {
            }
            catch (CudafyHostException che)
            {
                Console.Write(che.Message);
            }
        }
Beispiel #48
0
        private void meanPararelToolStripMenuItem_Click(object sender, EventArgs e)
        {
            DialogResult dr = new DialogResult();
            Form dlg1 = new AnalyzeForm();
            dr = dlg1.ShowDialog();
            for (int ix = 0; ix < Data.columnChoosen.Length; ix++)
                if (Data.columnChoosen[ix] != -1)
                    columnChoosen.Add(Data.variableView[Data.columnChoosen[ix]].nama);

            if (dr == DialogResult.OK)
            {
                for (int index = 0; index < Data.columnChoosen.Length; index++)
                    if (Data.columnChoosen[index] != -1)
                    {
                        int column = Data.columnChoosen[index];
                        try
                        {
                            // This 'smart' method will Cudafy all members with the Cudafy attribute in the calling type (i.e. Program)
                            CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20);
                            // If cudafying will not work for you (CUDA SDK + VS not set up right) then comment out above and
                            // uncomment below. Remember to also comment out the Structs and 3D arrays region below.
                            // CUDA 5.5 SDK must be installed and cl.exe (VC++ compiler) must be in path.
                            //CudafyModule km = CudafyModule.Deserialize(typeof(Program).Name);
                            //var options = NvccCompilerOptions.Createx64(eArchitecture.sm_12);
                            //km.CompilerOptionsList.Add(options);
                            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
                            _gpu.LoadModule(km);
                            GPGPUProperties gpprop = _gpu.GetDeviceProperties(false);
                            var sheet = reoGridControl2.CurrentWorksheet;
                            // Get the first CUDA device and load our module
                            int N = sheet.RowCount / 2;
                            float[] a = new float[N];
                            float[] b = new float[N];
                            float[] c = new float[N];
                            // fill the arrays 'a' and 'b' on the CPU
                            int jumlahData = 0;
                            for (int i = 0; i < N; i++)
                            {
                                if (sheet[i, column] != null && sheet[i, column].ToString() != "")
                                {
                                    float.TryParse(sheet[i, column].ToString(), out a[i]);
                                    jumlahData++;
                                }
                                if (sheet[i + N, column] != null && sheet[i + N, column].ToString() != "")
                                {
                                    float.TryParse(sheet[i + N, column].ToString(), out b[i]);
                                    jumlahData++;

                                }
                            }
                            float temp, temp2;
                            int missingCount = 0;

                            for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++)
                            {
                                for (int ax = 0; ax < N; ax++)
                                {
                                    float.TryParse(Data.variableView[column].missing[bx], out temp);
                                    if (a[ax] == temp)
                                    {
                                        a[ax] = 0;
                                        missingCount++;
                                    }
                                }
                            }

                            if (Data.variableView[column].missingRange.Count > 1)
                            {
                                for (int ax = 0; ax < N; ax++)
                                {
                                    float.TryParse(Data.variableView[column].missingRange[0], out temp);
                                    float.TryParse(Data.variableView[column].missingRange[1], out temp2);
                                    if (a[ax] >= temp && a[ax] <= temp2)
                                    {
                                        a[ax] = 0;
                                        missingCount++;
                                    }

                                }
                            }

                            Debug.WriteLine(missingCount);
                            float meanSequential = 0;
                            for (int i = 0; i < N; i++)
                                meanSequential += a[i] + b[i];
                            meanSequential = meanSequential / (jumlahData - missingCount); ;
                            float[] dev_a = _gpu.CopyToDevice(a);
                            float[] dev_b = _gpu.CopyToDevice(b);
                            float[] dev_c = _gpu.Allocate<float>(c);


                            bool first = true;
                            int N_awal = N;
                            while (N > 1)
                            {
                                if (!first)
                                {
                                    a = new float[N];
                                    b = new float[N];
                                    // c = new int[N];
                                    float[] baru = new float[N];
                                    for (int i = 0; i < (c.Count() - N); i++)
                                        baru[i] = c[N + i];

                                    dev_a = _gpu.CopyToDevice(c.Take(N).ToArray());
                                    dev_b = _gpu.CopyToDevice(baru);
                                    c = new float[N];
                                    dev_c = _gpu.Allocate<float>(c);
                                }

                                float[] d = new float[N];
                                _gpu.CopyFromDevice(dev_a, d);
                                //      _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N);
                                _gpu.Launch((N + 127) / 128, 128).addVector(dev_a, dev_b, dev_c, N);

                                _gpu.CopyFromDevice(dev_c, c);


                                _gpu.Free(dev_a);
                                _gpu.Free(dev_b);
                                _gpu.Free(dev_c);

                                if (N % 2 == 0)
                                    N = N / 2;
                                else
                                    N = (N + 1) / 2;

                                first = false;
                            }

                            Debug.WriteLine("mean-nya adalah " + (c[0] + c[1]) / (jumlahData - missingCount) + " mean dari sequensial adalah " + meanSequential);
                            results.Add((c[0] + c[1]) / (jumlahData - missingCount));
                            //for (int i = 0; i < N; i++)
                            //    Debug.Assert(a[i] + b[i] == c[i]);
                            _gpu.FreeAll();

                        }
                        catch (CudafyLanguageException cle)
                        {
                        }
                        catch (CudafyCompileException cce)
                        {
                        }
                        catch (CudafyHostException che)
                        {
                            Console.Write(che.Message);
                        }
                    }

                DialogResult dialog = new DialogResult();
                Form dialogResult = new FormResultsMean();
                dialog = dialogResult.ShowDialog();


                //  Console.ReadLine();
            }
            else
                dlg1.Close();
        }
Beispiel #49
0
        public static void Example2(GPGPU gpu)
        {
            ArrayView view1 = new ArrayView();
            ArrayView view2 = new ArrayView();
            float[] data = Enumerable.Range(0, 1000).Select(t => (float)t).ToArray();
            // Two views of the array, simply applying an offset to the array; could slice instead for example.
            view1.CreateView(data, 100);
            view2.CreateView(data, 200);

            for (int i = 0; i < 1000; ++i) data[i] = data[i] * 10f;
            // Should copy the 'large' array to the device only once; this is referenced by each ArrayView instance.
            var dev_view1 = DeviceClassHelper.CreateDeviceObject(gpu, view1); 
            var dev_view2 = DeviceClassHelper.CreateDeviceObject(gpu, view2);

            var dev_result = gpu.Allocate<float>(5);
            var hostResult = new float[5];

            gpu.Launch(1, 1).Test2(dev_view1, dev_view2, dev_result);
            gpu.CopyFromDevice(dev_result, hostResult);

            bool pass = (hostResult[0] == 1050f && hostResult[1] == 7f);
            Console.WriteLine(pass ? "Pass" : "Fail");
        }
Beispiel #50
0
        public float computeSum2(float[] array)
        {
            try
            {
                // This 'smart' method will Cudafy all members with the Cudafy attribute in the calling type (i.e. Program)
                CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20);
                // If cudafying will not work for you (CUDA SDK + VS not set up right) then comment out above and
                // uncomment below. Remember to also comment out the Structs and 3D arrays region below.
                // CUDA 5.5 SDK must be installed and cl.exe (VC++ compiler) must be in path.
                //CudafyModule km = CudafyModule.Deserialize(typeof(Program).Name);
                //var options = NvccCompilerOptions.Createx64(eArchitecture.sm_12);
                //km.CompilerOptionsList.Add(options);
                _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
                _gpu.LoadModule(km);
                GPGPUProperties gpprop = _gpu.GetDeviceProperties(false);
                var sheet = reoGridControl2.CurrentWorksheet;
                // Get the first CUDA device and load our module
                int N = sheet.RowCount / 2;
                float[] a = new float[N];
                float[] b = new float[N];
                float[] c = new float[N];
                // fill the arrays 'a' and 'b' on the CPU
                int jumlahData = 0;
                for (int i = 0; i < N; i++)
                {
                    if (array[i] != null && array[i].ToString() != "")
                    {
                        a[i] = array[i];
                        jumlahData++;
                    }
                    if (array[i + N] != null && array[i + N].ToString() != "")
                    {
                        b[i] = array[i + N];
                        jumlahData++;

                    }
                }
             //   float temp, temp2;
              //  int missingCount = 0;

                //for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++)
                //{
                //    for (int ax = 0; ax < N; ax++)
                //    {
                //        float.TryParse(Data.variableView[column].missing[bx], out temp);
                //        if (a[ax] == temp)
                //        {
                //            a[ax] = 0;
                //            missingCount++;
                //        }
                //    }
                //}

                //if (Data.variableView[column].missingRange.Count > 1)
                //{
                //    for (int ax = 0; ax < N; ax++)
                //    {
                //        float.TryParse(Data.variableView[column].missingRange[0], out temp);
                //        float.TryParse(Data.variableView[column].missingRange[1], out temp2);
                //        if (a[ax] >= temp && a[ax] <= temp2)
                //        {
                //            a[ax] = 0;
                //            missingCount++;
                //        }

                //    }
               // }

                // Debug.WriteLine(missingCount);
                //float meanSequential = 0;
                //for (int i = 0; i < N; i++)
                //    meanSequential += a[i] + b[i];
                //meanSequential = meanSequential / (jumlahData - missingCount); ;
                float[] dev_a = _gpu.CopyToDevice(a);
                float[] dev_b = _gpu.CopyToDevice(b);
                float[] dev_c = _gpu.Allocate<float>(c);


                bool first = true;
                int N_awal = N;
                while (N > 1)
                {


                    if (!first)
                    {
                        a = new float[N];
                        b = new float[N];
                        // c = new int[N];
                        float[] baru = new float[N];
                        for (int i = 0; i < (c.Count() - N); i++)
                            baru[i] = c[N + i];

                        dev_a = _gpu.CopyToDevice(c.Take(N).ToArray());
                        dev_b = _gpu.CopyToDevice(baru);
                        c = new float[N];
                        dev_c = _gpu.Allocate<float>(c);
                    }

                    float[] d = new float[N];
                    _gpu.CopyFromDevice(dev_a, d);
                    //      _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N);
                    _gpu.Launch((N + 127) / 128, 128).addVector(dev_a, dev_b, dev_c, N);

                    _gpu.CopyFromDevice(dev_c, c);


                    _gpu.Free(dev_a);
                    _gpu.Free(dev_b);
                    _gpu.Free(dev_c);

                    if (N % 2 == 0)
                        N = N / 2;
                    else
                        N = (N + 1) / 2;

                    first = false;
                }

                //    Debug.WriteLine("mean-nya adalah " + (c[0] + c[1]) / (jumlahData - missingCount) + " mean dari sequensial adalah " + meanSequential);
                //    results.Add((c[0] + c[1]) / (jumlahData - missingCount));
                //for (int i = 0; i < N; i++)
                //    Debug.Assert(a[i] + b[i] == c[i]);
                _gpu.FreeAll();
                //return (c[0] + c[1]) / (jumlahData - missingCount);
                return (c[0] + c[1]);
              }

            catch (CudafyLanguageException cle)
            {
            }
            catch (CudafyCompileException cce)
            {
            }
            catch (CudafyHostException che)
            {
                Console.Write(che.Message);
            }
            return 0;
        }
        public void Test_TwoThreadTwoGPUVer2()
        {
            eArchitecture arch = CudafyModes.Target == eGPUType.OpenCL ? eArchitecture.OpenCL : eArchitecture.sm_11;
                      
            _gpu0 = CudafyHost.GetDevice(CudafyModes.Target, 0);
            var cm = CudafyTranslator.Cudafy(arch, typeof(MultiGPUTests));
            _gpu0.SetCurrentContext();
            _gpu0.LoadModule(cm);
            _gpuuintBufferIn0 = _gpu0.Allocate(_uintBufferIn0);
            
            _gpu1 = CudafyHost.GetDevice(CudafyModes.Target, 1);
            // Cannot load same module to two devices, therefore need to clone.
            var cm1 = cm.Clone();      
            _gpu1.SetCurrentContext();
            _gpu1.LoadModule(cm1);
            _gpuuintBufferIn1 = _gpu1.Allocate(_uintBufferIn1);

            _gpu0.EnableMultithreading();
            _gpu1.EnableMultithreading();
            bool j1 = false;
            bool j2 = false;
            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine(i);
                Thread t1 = new Thread(Test_TwoThreadTwoGPU_Thread0V2);
                Thread t2 = new Thread(Test_TwoThreadTwoGPU_Thread1V2);
                t1.Start();
                t2.Start();
                j1 = t1.Join(10000);
                j2 = t2.Join(10000);
                if (!j1 || !j2)
                    break;
            }
            _gpu0.DisableMultithreading();
            _gpu0.FreeAll();
            _gpu1.DisableMultithreading();
            _gpu1.FreeAll();
            Assert.IsTrue(j1);
            Assert.IsTrue(j2);
        }
        public void Test_TwoThreadCopy()
        {
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
            _gpuuintBufferIn3 = _gpu.Allocate(_uintBufferIn1);
            _gpuuintBufferIn4 = _gpu.Allocate(_uintBufferIn1);
            _gpu.EnableMultithreading();
            bool j1 = false;
            bool j2 = false;
            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine(i);
                SetInputs();
                ClearOutputs();
                Thread t1 = new Thread(Test_TwoThreadCopy_Thread1);
                Thread t2 = new Thread(Test_TwoThreadCopy_Thread2);
                t1.Start();
                t2.Start();
                j1 = t1.Join(10000);
                j2 = t2.Join(10000);
                if (!j1 || !j2)
                    break;
            }

            _gpu.DisableMultithreading();           
            _gpu.FreeAll();
            Assert.IsTrue(j1);
            Assert.IsTrue(j2);
        }