/// <summary>
        ///     Вызов и исполнение одной элементарной функции по имени функции
        /// </summary>
        /// <param name="function"></param>
        public static void Execute(string function)
        {
            Debug.Assert(_indexes1.Last() == _sequencies1.Length);
            Debug.Assert(_indexes2.Last() == _sequencies2.Length);

            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            // copy the arrays 'a' and 'b' to the GPU
            int[] devIndexes1    = gpu.CopyToDevice(_indexes1);
            int[] devIndexes2    = gpu.CopyToDevice(_indexes2);
            int[] devSequencies1 = gpu.CopyToDevice(_sequencies1);
            int[] devSequencies2 = gpu.CopyToDevice(_sequencies2);
            int[,] devMatrix = gpu.Allocate(_matrix);

            int rows    = _matrix.GetLength(0);
            int columns = _matrix.GetLength(1);

            dim3 gridSize  = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333));
            dim3 blockSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333));

            gpu.Launch(gridSize, blockSize, function,
                       devSequencies1, devIndexes1,
                       devSequencies2, devIndexes2,
                       devMatrix);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(devMatrix, _matrix);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
        /// <summary>
        ///     Выполнение сортировки слияниями
        ///     Пример использования:
        ///     CudafySequencies.SetSequencies(arrayOfArray,arrayOfArray);
        ///     CudafySequencies.Execute("Compare");
        ///     var compare = CudafySequencies.GetMartix();
        ///     CudafyArray.SetArray(Enumerable.Range(0,n).ToArray());
        ///     CudafyArray.SetCompare(compare);
        ///     CudafyArray.MergeSort();
        ///     var indexesOfSorted = CudafyArray.GetArray();
        /// </summary>
        public static void MergeSort(int direction = 1)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            int[] devA = gpu.Allocate(_a);
            int[] devB = gpu.Allocate(_b);

            gpu.CopyToDevice(_a, devA);

            for (int i = 0; i < _ceiling; i++)
            {
                int gridSize  = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333));
                int blockSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333));
                gpu.Launch(gridSize, blockSize)
                .MergeLinear(((i & 1) == 0) ? devA : devB, ((i & 1) == 0) ? devB : devA, i, 0,
                             _length,
                             direction);
            }
            gpu.CopyFromDevice(((_ceiling & 1) == 0) ? devA : devB, _a);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
示例#3
0
        static void Main(string[] args)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int numFrames = numberOfSeconds * framesPerSecond;

            InitializeParticles();

            File.WriteAllText("length.txt", numFrames.ToString());

            for (int i = 0; i < numFrames; i++)
            {
                DateTime frameStart = DateTime.Now;

                Simulate(gpu);
                Bitmap frame = Render(gpu, i);

                TimeSpan frameTime = DateTime.Now - frameStart;
                Console.WriteLine("Frame " + i + " complete. Time: " + frameTime.TotalMilliseconds + "ms");
            }
        }
示例#4
0
        public static float[] CallGPU()
        {
            CudafyModes.Target        = eGPUType.OpenCL;
            CudafyModes.DeviceId      = 0;
            CudafyTranslator.Language = eLanguage.OpenCL;
            CudafyModule km  = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.OpenCL, typeof(GPU));
            GPGPU        gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0);

            gpu.LoadModule(km);
            km.Serialize();

            float[] input = Utils.GenerateRandomVector();
            float[,,] NN = Utils.GenerateRandomMatrix().AsSingleDimension();
            float[] output = new float[Utils.N];

            Stopwatch gpuSW = new Stopwatch();

            gpuSW.Start();
            float[] dev_output = gpu.Allocate <float>(output);
            float[] dev_input  = gpu.CopyToDevice(input);
            float[,,] dev_NN = gpu.CopyToDevice(NN);
            gpu.Launch(Utils.GRID_SIZE, Utils.BLOCK_SIZE).CalculateNeuralNetwork(dev_input, dev_NN, dev_output);
            gpu.CopyFromDevice(dev_output, output);
            gpu.FreeAll();
            gpuSW.Stop();
            Console.WriteLine("GPU: " + gpuSW.ElapsedMilliseconds);
            return(output);
        }
        public void SetUp()
        {
            //CudafyModes.Architecture = eArchitecture.sm_30;
            _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId);
            Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported.");

            _cm = CudafyModule.TryDeserialize();
            if (_cm == null || !_cm.TryVerifyChecksums())
            {
                _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30);
                Console.WriteLine(_cm.CompilerOutput);
                _cm.TrySerialize();
            }

            _gpu.LoadModule(_cm);

            inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc,
                                        0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb}; // arbitrary values
            d_inputIntArray = _gpu.CopyToDevice(inputIntArray);
            d_outputIntArray = _gpu.Allocate<int>(WARP_SIZE);
            gpuIntResult = new int[WARP_SIZE];
            cpuIntResult = new int[WARP_SIZE];

            inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f,
                                            1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f,
                                            7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f,
                                            377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f}; // arbitrary values
            d_inputFloatArray = _gpu.CopyToDevice(inputFloatArray);
            d_outputFloatArray = _gpu.Allocate<float>(WARP_SIZE);
            gpuFloatResult = new float[WARP_SIZE];
            cpuFloatResult = new float[WARP_SIZE];
        }
示例#6
0
        //
        // http://stackoverflow.com/questions/18628447/cudafy-throws-an-exception-while-testing
        //
        private static void BlasSample(int deviceId)
        {
            CudafyModes.Target = eGPUType.Emulator;
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, deviceId);

            CudafyModes.DeviceId = deviceId;
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            GPGPUBLAS blas = GPGPUBLAS.Create(gpu);

            const int N = 100;

            float[] a     = new float[N];
            float[] b     = new float[N];
            float[] c     = new float[N];
            float   alpha = -1;
            float   beta  = 0;

            float[] device_a = gpu.CopyToDevice(a);
            float[] device_b = gpu.CopyToDevice(b);
            float[] device_c = gpu.CopyToDevice(c);

            int             m  = 10;
            int             n  = 10;
            int             k  = 10;
            cublasOperation Op = cublasOperation.N;

            blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op);

            throw new NotImplementedException();
        }
示例#7
0
        public static uint[] Evaluate(ulong[] hands, int numCards)
        {
            // Translates this class to CUDA C and then compliles
            CudafyModule km = CudafyTranslator.Cudafy();//eArchitecture.sm_20);

            // Get the first GPU and load the module
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int blockSize = 256;
            int blockx    = hands.Length / blockSize;

            if (hands.Length % blockSize != 0)
            {
                blockx++;
            }

            ulong[] dev_hands = gpu.Allocate <ulong>(hands.Length);
            uint[]  dev_ranks = gpu.Allocate <uint>(hands.Length);

            gpu.CopyToDevice(hands, dev_hands);

            gpu.StartTimer();
            gpu.Launch(blockx, blockSize).evaluate(dev_hands, numCards, hands.Length, dev_ranks);
            var ts = gpu.StopTimer();

            uint[] toReturn = new uint[hands.Length];
            gpu.CopyFromDevice(dev_ranks, toReturn);

            return(toReturn);
        }
示例#8
0
        public void SetUp()
        {
            //CudafyModes.Architecture = eArchitecture.sm_30;
            _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId);
            Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported.");

            _cm = CudafyModule.TryDeserialize();
            if (_cm == null || !_cm.TryVerifyChecksums())
            {
                _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30);
                Console.WriteLine(_cm.CompilerOutput);
                _cm.TrySerialize();
            }

            _gpu.LoadModule(_cm);

            inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc,
                                        0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb }; // arbitrary values
            d_inputIntArray  = _gpu.CopyToDevice(inputIntArray);
            d_outputIntArray = _gpu.Allocate <int>(WARP_SIZE);
            gpuIntResult     = new int[WARP_SIZE];
            cpuIntResult     = new int[WARP_SIZE];

            inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f,
                                            1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f,
                                            7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f,
                                            377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f }; // arbitrary values
            d_inputFloatArray  = _gpu.CopyToDevice(inputFloatArray);
            d_outputFloatArray = _gpu.Allocate <float>(WARP_SIZE);
            gpuFloatResult     = new float[WARP_SIZE];
            cpuFloatResult     = new float[WARP_SIZE];
        }
示例#9
0
        public static void eksekusi()
        {
            CudafyModule kernel_modul = CudafyTranslator.Cudafy();
            GPGPU        vga          = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            vga.LoadModule(kernel_modul);
            Stopwatch waktu = new Stopwatch();

            waktu.Start();
            int[] array_vga   = vga.Allocate <int>(KONSTANTA_THREAD);
            int[] array_hasil = new int[KONSTANTA_THREAD];

            //long[] matriks1 = vga.Allocate<long>(KONSTANTA_THREAD);
            //long[] matriks2 = vga.Allocate<long>(KONSTANTA_THREAD);//new int[KONSTANTA_THREAD];
            //long[] matriks3 = vga.Allocate<long>(KONSTANTA_THREAD); //[KONSTANTA_THREAD];

            vga.Launch(KONSTANTA_THREAD, 1).fungsiAtomic(array_vga);
            vga.CopyFromDevice(array_vga, array_hasil);
            vga.FreeAll();

            //for(int z = 0; z < array_hasil.Length; z++)
            //{
            //    Console.WriteLine("Hasil Ekstrak----" + array_hasil[z]);
            //}
            vga.FreeAll();
            waktu.Stop();
            TimeSpan ts    = waktu.Elapsed;
            String   total = ts.Milliseconds.ToString();

            Console.WriteLine("Total VGA ------ > " + total);
        }
示例#10
0
        /// <summary>
        /// Translates and compiles the given types against specified compilation properties.
        /// </summary>
        /// <param name="props">The settings.</param>
        /// <param name="types">Types to search and translate.</param>
        /// <returns></returns>
        public static CudafyModule Cudafy(IEnumerable <CompileProperties> props, params Type[] types)
        {
            CudafyModule km = null;

            //var uniqueLanguages = new List<eLanguage>();
            //if (props.Any(p => p.Language == eLanguage.Cuda))
            //    uniqueLanguages.Add(eLanguage.Cuda);
            //if (props.Any(p => p.Language == eLanguage.OpenCL))
            //    uniqueLanguages.Add(eLanguage.OpenCL);
            //foreach (var lang in uniqueLanguages)
            foreach (var p in props)
            {
                CudafyTranslator.Language      = p.Language;
                _architecture                  = p.Architecture;
                CUDALanguage.ComputeCapability = GetComputeCapability(p.Architecture);

                km = DoCudafy(km, types);
                if (km == null)
                {
                    throw new CudafyFatalException(CudafyFatalException.csUNEXPECTED_STATE_X, "CudafyModule km = null");
                }
            }
            km.WorkingDirectory = WorkingDirectory;
            km.Compile(props.ToArray());

            Type lastType = types.Last(t => t != null);

            if (lastType != null)
            {
                km.Name = lastType.Name;
            }
            return(km);
        }
示例#11
0
        public static bool TestGpuDoublePrecision(int DeviceId)
        {
            if (DeviceId > CudafyHost.GetDeviceCount(eGPUType.OpenCL))
            {
                return(false);
            }

            try
            {
                CudafyModes.Target        = eGPUType.OpenCL;
                CudafyTranslator.Language = eLanguage.OpenCL;
                CudafyModule km  = CudafyTranslator.Cudafy();
                GPGPU        gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId);
                gpu.LoadModule(km);

                double   c;
                double[] dev_c = gpu.Allocate <double>();
                gpu.Launch().add_double(2.5d, 7.5d, dev_c);
                gpu.CopyFromDevice(dev_c, out c);
                gpu.Free(dev_c);
                return(c == 10.0d);
            }
            catch
            { return(false); }
        }
示例#12
0
        /// <summary>
        ///     Вызов и исполнение функции проверки что массив отсортирован
        /// </summary>
        public static void ExecuteSorted(int direction = 1)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);


            int[] devA = gpu.Allocate(_a);
            int[] devB = gpu.Allocate(_b);
            int[] devC = gpu.Allocate(_c);
            int[] devD = gpu.Allocate(D);

            gpu.CopyToDevice(_a, devA);

            gpu.Launch(1, 1).Split(devA, devB, devC, _middle);
            gpu.Launch(_gridSize, _blockSize).Sorted(devA, devB, devC, devD, 0, direction);
            gpu.Launch(1, 1).Sorted(devA, devB, devC, devD, 1, direction);

            gpu.CopyFromDevice(devD, D);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
示例#13
0
        public static void primaGPU()
        {
            CudafyModule modul_kernel = CudafyTranslator.Cudafy();
            GPGPU        vga          = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            vga.LoadModule(modul_kernel);

            Stopwatch waktu = new Stopwatch();

            waktu.Start();
            int[] list_cpu = new int[KONSTANTA_THREAD];
            int[] list_cpy = new int[KONSTANTA_THREAD];
            int[] list     = vga.Allocate <int>(KONSTANTA_THREAD);
            vga.Launch(KONSTANTA_THREAD, 1).ModulAtomic(list);
            vga.CopyFromDevice(list, list_cpy);
            vga.FreeAll();

            int index = 0;

            for (int z = 0; z < list_cpy.Length; z++)
            {
                if (list_cpy[z] != -1)
                {
                    list_cpu[index] = list_cpy[z];
                    //Console.WriteLine(list_cpu[index]);
                    index++;
                }
            }
            waktu.Stop();
            TimeSpan ts    = waktu.Elapsed;
            String   total = ts.Seconds.ToString();

            Console.WriteLine("Total GPU ------ {0} detik> ", total);
        }
示例#14
0
        public static void Execute()
        {
            CudafyModule km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy(typeof(Generic <ushort, ushort>), typeof(SimpleGeneric));
                km.Serialize();
            }

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target);

            gpu.LoadModule(km);

            var input = new Generic <ushort, ushort>();

            input.A = 187;

            int[] devoutput = gpu.Allocate <int>(1);
            gpu.Launch(1, 1, "Kernel", input, devoutput);

            int output;

            gpu.CopyFromDevice(devoutput, out output);

            Console.WriteLine("Simple Generic: " + ((output == 1) ? "PASSED" : "FAILED"));
        }
        public void ExeTestKernel()
        {
            GPGPU         gpu  = CudafyHost.GetDevice(CudafyModes.Target, 0);
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            int[] host_results = new int[N];

            // Either assign a new block of memory to hold results on device
            var dev_results = gpu.Allocate <int>(N);

            // Or fill your array with values first and then
            for (int i = 0; i < N; i++)
            {
                host_results[i] = i * 3;
            }

            // Copy array with ints to device
            var dev_filled_results = gpu.CopyToDevice(host_results);

            // 64*16 = 1024 threads per block (which is max for sm_30)
            dim3 threadsPerBlock = new dim3(64, 16);

            // 8*8 = 64 blocks per grid , just for show so you get varying numbers
            dim3 blocksPerGrid = new dim3(8, 8);

            //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0
            //var blocksPerGrid = 1;      // just for show

            gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results);

            gpu.CopyFromDevice(dev_results, host_results);
        }
示例#16
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile));

            _gpu = CudafyHost.GetDevice(CudafyModes.Target);
            _gpu.LoadModule(km);

            ParamsStruct[] host_par = new ParamsStruct[1];
            ParamsStruct[] result   = new ParamsStruct[1];
            host_par[0].OP     = 96.95;
            host_par[0].Price  = 1332.24;
            host_par[0].Strike = 1235;
            host_par[0].TD     = 31;
            host_par[0].R      = 0.0001355;
            host_par[0].Q      = 0.0166;
            host_par[0].N      = 100;// 1000;
            host_par[0].kind   = 1;

            ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par);
            float[]        PA      = _gpu.Allocate <float>(1001);
            _gpu.Launch(1, 1, "impliedVolatile", dev_par, PA);

            _gpu.CopyFromDevice(dev_par, 0, result, 0, 1);

            Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B);
            //Console.ReadKey();
        }
示例#17
0
        public void Initialize(int DeviceId)
        {
            CudafyModes.Target        = eGPUType.OpenCL;
            CudafyTranslator.Language = eLanguage.OpenCL;
            CudafyModule km = CudafyTranslator.Cudafy();

            Gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId);
            Gpu.LoadModule(km);
        }
示例#18
0
        public static void Execute(byte[] bitmap)
        {
            DateTime     dt = DateTime.Now;
            CudafyModule km = CudafyModule.TryDeserialize(csFILENAME);

            // Check the module exists and matches the .NET modules, else make new
            if (km == null || !km.TryVerifyChecksums())
            {
                Console.WriteLine("There was no cached module available so we make a new one.");
                km = CudafyModule.Deserialize(typeof(ray_serialize).Name);
                km.Serialize(csFILENAME);
            }

            GPGPU gpu = CudafyHost.GetGPGPU(CudafyModes.Target, 1);

            gpu.LoadModule(km);

            Console.WriteLine("Time taken to load module: {0}ms", DateTime.Now.Subtract(dt).Milliseconds);

            // capture the start time
            gpu.StartTimer();

            // allocate memory on the GPU for the bitmap (same size as ptr)
            byte[] dev_bitmap = gpu.Allocate(bitmap);

            // allocate temp memory, initialize it, copy to constant memory on the GPU
            Sphere[] temp_s = new Sphere[SPHERES];
            for (int i = 0; i < SPHERES; i++)
            {
                temp_s[i].r = rnd(1.0f);
                temp_s[i].g = rnd(1.0f);
                temp_s[i].b = rnd(1.0f);

                temp_s[i].x      = rnd(1000.0f) - 500;
                temp_s[i].y      = rnd(1000.0f) - 500;
                temp_s[i].z      = rnd(1000.0f) - 500;
                temp_s[i].radius = rnd(100.0f) + 20;
            }

            gpu.CopyToConstantMemory(temp_s, s);

            // generate a bitmap from our sphere data
            dim3 grids   = new dim3(DIM / 16, DIM / 16);
            dim3 threads = new dim3(16, 16);

            gpu.Launch(grids, threads, "kernel", dev_bitmap);

            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(dev_bitmap, bitmap);

            // get stop time, and display the timing results
            float elapsedTime = gpu.StopTimer();

            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            gpu.DeviceFreeAll();
        }
示例#19
0
        public static void Execute(byte[] bitmap)
        {
            CudafyModule km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy(typeof(SphereOpenCL), typeof(ray_opencl_const));
                km.TrySerialize();
            }

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            // capture the start time
            gpu.StartTimer();

            // allocate memory on the GPU for the bitmap (same size as ptr)
            byte[] dev_bitmap = gpu.Allocate(bitmap);

            // allocate memory for the Sphere dataset
            //SphereOpenCL[] s = gpu.Allocate<SphereOpenCL>(SPHERES);

            // allocate temp memory, initialize it, copy to constant memory on the GPU
            SphereOpenCL[] temp_s = new SphereOpenCL[SPHERES];
            for (int i = 0; i < SPHERES; i++)
            {
                temp_s[i].r = rnd(1.0f);
                temp_s[i].g = rnd(1.0f);
                temp_s[i].b = rnd(1.0f);

                temp_s[i].x      = rnd(1000.0f) - 500;
                temp_s[i].y      = rnd(1000.0f) - 500;
                temp_s[i].z      = rnd(1000.0f) - 500;
                temp_s[i].radius = rnd(100.0f) + 20;
            }
            //gpu.CopyToDevice(temp_s, s);
            gpu.CopyToConstantMemory(temp_s, spheres);

            // generate a bitmap from our sphere data
            dim3 grids   = new dim3(ray_gui.DIM / 16, ray_gui.DIM / 16);
            dim3 threads = new dim3(16, 16);

            //gpu.Launch(grids, threads).kernel(s, dev_bitmap); // Dynamic
            gpu.Launch(grids, threads, ((Action <GThread, byte[]>)thekernel), dev_bitmap); // Strongly typed

            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(dev_bitmap, bitmap);

            // get stop time, and display the timing results
            float elapsedTime = gpu.StopTimer();

            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            gpu.FreeAll();
        }
示例#20
0
        public GPULiquidSystem()
        {
            CudafyTranslator.Language = eLanguage.OpenCL;
            CudafyModule km = CudafyTranslator.Cudafy(typeof(GPULiquidSystem));

            System.IO.File.WriteAllText("LiquidSystem.cl", km.SourceCode);

            _gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0);
            _gpu.LoadModule(km);
        }
示例#21
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);
            gpu.Launch().thekernel(); // or gpu.Launch(1, 1, "kernel");
            Console.WriteLine("Sample kernel started successfully!");
        }
示例#22
0
        ///// <summary>
        ///// Initializes a new instance of the <see cref="KernelMethodInfo"/> class.
        ///// </summary>
        ///// <param name="type">The type.</param>
        ///// <param name="method">The method.</param>
        ///// <param name="gpuMethodType">Type of the gpu method.</param>
        ///// <param name="noDummyInclude"></param>
        ///// <param name="parentModule"></param>
        //public KernelMethodInfo(Type type, MethodInfo method, eKernelMethodType gpuMethodType, bool noDummyInclude, CudafyModule parentModule)
        //    : this(type, method, gpuMethodType, false, false, parentModule)
        //{
        //}

        /// <summary>
        /// Initializes a new instance of the <see cref="KernelMethodInfo"/> class.
        /// </summary>
        /// <param name="type">The type.</param>
        /// <param name="method">The method.</param>
        /// <param name="gpuMethodType">Type of the gpu method.</param>
        /// <param name="isDummy">if set to <c>true</c> is dummy.</param>
        /// <param name="behaviour"></param>
        /// <param name="parentModule">Module of which this is a part.</param>
        public KernelMethodInfo(Type type, MethodInfo method, eKernelMethodType gpuMethodType, bool isDummy, eCudafyDummyBehaviour behaviour, CudafyModule parentModule)
        {
            Type                 = type;
            Method               = method;
            MethodType           = gpuMethodType;
            DeserializedChecksum = 0;
            IsDummy              = isDummy;
            Behaviour            = behaviour;
            ParentModule         = parentModule;
        }
 public void SetUp()
 {
     //var x = CompilerHelper.Create(ePlatform.x64, eArchitecture.OpenCL, eCudafyCompileMode.Default);
     var y = CompilerHelper.Create(ePlatform.x64, CudafyModes.Architecture, eCudafyCompileMode.DynamicParallelism); 
     _cm = CudafyTranslator.Cudafy(new CompileProperties[] {y}, this.GetType());
     Console.WriteLine(_cm.CompilerOutput);
     _cm.Serialize();
     _gpu = CudafyHost.GetDevice(y.Architecture, CudafyModes.DeviceId);
     _gpu.LoadModule(_cm);
 }
示例#24
0
        public static void Execute()
        {
            // Translates this class to CUDA C and then compliles
            CudafyModule km = CudafyTranslator.Cudafy();

            // Get the first GPU and load the module
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            // Create some arrays on the host
            int[] a = new int[N];
            int[] b = new int[N];
            int[] c = new int[N];

            // allocate the memory on the GPU
            int[] dev_c = gpu.Allocate <int>(c);

            // fill the arrays 'a' and 'b' on the CPU
            for (int i = 0; i < N; i++)
            {
                a[i] = i;
                b[i] = 2 * i;
            }

            // copy the arrays 'a' and 'b' to the GPU
            int[] dev_a = gpu.CopyToDevice(a);
            int[] dev_b = gpu.CopyToDevice(b);

            // Launch 128 blocks of 128 threads each
            gpu.Launch(128, 128).add(dev_a, dev_b, dev_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_c, c);

            // verify that the GPU did the work we requested
            bool success = true;

            for (int i = 0; i < N; i++)
            {
                if ((a[i] + b[i]) != c[i])
                {
                    Console.WriteLine("{0} + {1} != {2}", a[i], b[i], c[i]);
                    success = false;
                    break;
                }
            }
            if (success)
            {
                Console.WriteLine("We did it!");
            }

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
示例#25
0
        public void SetUp()
        {
            //var x = CompilerHelper.Create(ePlatform.x64, eArchitecture.OpenCL, eCudafyCompileMode.Default);
            var y = CompilerHelper.Create(ePlatform.x64, CudafyModes.Architecture, eCudafyCompileMode.DynamicParallelism);

            _cm = CudafyTranslator.Cudafy(new CompileProperties[] { y }, this.GetType());
            Console.WriteLine(_cm.CompilerOutput);
            _cm.Serialize();
            _gpu = CudafyHost.GetDevice(y.Architecture, CudafyModes.DeviceId);
            _gpu.LoadModule(_cm);
        }
示例#26
0
        public void LoadModule()
        {
            CudafyModule km = CudafyModule.TryDeserialize();    // Look for cdfy module file before generating

            // Ensure if using Cuda, use 2.0 architecture for Atomics compatibility
            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy();
            }
            gpu.LoadModule(km);
        }
示例#27
0
        public double[] transpose(double[] inputArray)
        {
            GPGPU        gpu = CudafyHost.GetDevice(eGPUType.Cuda);
            CudafyModule km  = CudafyTranslator.Cudafy(eArchitecture.sm_35);

            gpu.LoadModule(km);
            dim3 grid = new dim3(1000);

            gpu.Launch();
            return(new double[1]);
        }
        public static void RunTest()
        {
            CudafyModule km  = CudafyTranslator.Cudafy();
            GPGPU        gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            gpu.LoadModule(km);


            gpu.Launch().thekernel(); // or gpu.Launch(1, 1, "kernel");
            Console.WriteLine("Sample kernel started successfully!");
        }
示例#29
0
 public override void UnloadModule(CudafyModule module)
 {
     if (!_modules.Remove(module))
     {
         throw new CudafyHostException(CudafyHostException.csMODULE_NOT_FOUND);
     }
     if (_module == module)
     {
         _module = null;
     }
 }
示例#30
0
 public void SetUp()
 {
     CudafyTranslator.GenerateDebug = true;
     _cm = CudafyModule.TryDeserialize();
     _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);
     if (_cm == null || !_cm.TryVerifyChecksums())
     {
         _cm = CudafyTranslator.Cudafy(_gpu.GetArchitecture(), this.GetType(), (_gpu is OpenCLDevice) ? null  : typeof(StringConstClass));              
         _cm.TrySerialize();
     }
     _gpu.LoadModule(_cm);
 }
示例#31
0
文件: dot.cs 项目: rblenis/cudafy
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            float c;

            // allocate memory on the cpu side
            float[] a         = new float[N];
            float[] b         = new float[N];
            float[] partial_c = new float[blocksPerGrid];

            // allocate the memory on the GPU
            float[] dev_a         = gpu.Allocate <float>(N);
            float[] dev_b         = gpu.Allocate <float>(N);
            float[] dev_partial_c = gpu.Allocate <float>(blocksPerGrid);

            float[] dev_test = gpu.Allocate <float>(blocksPerGrid * blocksPerGrid);

            // fill in the host memory with data
            for (int i = 0; i < N; i++)
            {
                a[i] = i;
                b[i] = i * 2;
            }

            // copy the arrays 'a' and 'b' to the GPU
            gpu.CopyToDevice(a, dev_a);
            gpu.CopyToDevice(b, dev_b);

            gpu.Launch(blocksPerGrid, threadsPerBlock).Dot(dev_a, dev_b, dev_partial_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_partial_c, partial_c);

            // finish up on the CPU side
            c = 0;
            for (int i = 0; i < blocksPerGrid; i++)
            {
                c += partial_c[i];
            }

            Console.WriteLine("Does GPU value {0} = {1}?\n", c, 2 * sum_squares((float)(N - 1)));

            // free memory on the gpu side
            gpu.FreeAll();

            // free memory on the cpu side
            // No worries...
        }
示例#32
0
        public void Initialize(int bytes)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            _gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            _gpu.LoadModule(km);

            _dev_bitmap = _gpu.Allocate <byte>(bytes);

            _blocks  = new dim3(DIM / 16, DIM / 16);
            _threads = new dim3(16, 16);
        }
示例#33
0
 public void SetUp()
 {
     CudafyTranslator.GenerateDebug = true;
     _cm  = CudafyModule.TryDeserialize();
     _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);
     if (_cm == null || !_cm.TryVerifyChecksums())
     {
         _cm = CudafyTranslator.Cudafy(_gpu.GetArchitecture(), this.GetType(), (_gpu is OpenCLDevice) ? null  : typeof(StringConstClass));
         _cm.TrySerialize();
     }
     _gpu.LoadModule(_cm);
 }
示例#34
0
        public virtual void SetUp()
        {
            _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);
            var types = new List<Type>();
            types.Add(this.GetType());
            types.Add(typeof(MathSingleTest));
            SupportsDouble = _gpu.GetDeviceProperties().SupportsDoublePrecision;
            if (SupportsDouble)
                types.Add(typeof(MathDoubleTest));

            _cm = CudafyTranslator.Cudafy(CudafyModes.Architecture, types.ToArray());
            Debug.WriteLine(_cm.SourceCode);
            _gpu.LoadModule(_cm);
        }
        public void SetUp()
        {
            _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);

            _cm = CudafyModule.TryDeserialize();
            if (_cm == null || !_cm.TryVerifyChecksums())
            {
                _cm = CudafyTranslator.Cudafy(CudafyModes.Architecture);//typeof(PrimitiveStruct), typeof(BasicFunctionTests));
                Console.WriteLine(_cm.CompilerOutput);
                _cm.TrySerialize();
            }

            _gpu.LoadModule(_cm);

        }
        public void SetUp()
        {
            //var cm = CudafyTranslator.Cudafy(typeof(PrimitiveStruct), GetType());
            //
            if (CudafyModule.HasCudafyModuleInAssembly())   // Post-build event command line was: cudafycl.exe $(TargetPath)
            {                                               // Do this for Release
                _cm = GetType().Assembly.GetCudafyModule();
            }
            else // Post-build event command line was: cudafycl.exe $(TargetPath) -cdfy
            {    // Do this for Debug
                string name = this.GetType().Assembly.GetName().Name;
                _cm = CudafyModule.TryDeserialize(name);
            }
            Assert.IsFalse(_cm == null);
            Assert.IsTrue(_cm.TryVerifyChecksums());

            _gpu = CudafyHost.GetDevice(CudafyModes.Target);
            if(_cm != null)
                _gpu.LoadModule(_cm);
        }
 public void SetUp()
 {
     _cm = CudafyTranslator.Cudafy(eArchitecture.sm_20);//typeof(RelectorAddInFunctionsTests));
 }
 public void SetUp()
 {
     _cm = CudafyTranslator.Cudafy(typeof(Sphere), typeof(RelectorAddInTypeTests));
 }