Пример #1
0
        private void initGPU()
        {
            // Translate all members with the Cudafy attribute in the given type to CUDA and compile.
            CudafyModule km = CudafyTranslator.Cudafy(typeof(Population), typeof(UserUpdate), typeof(Fitness), typeof(FitnessParameter), typeof(PredictionPerformances), typeof(Experiment), typeof(SimOptions));

            // Get the first CUDA device and load the module generated above.
            gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);
            gpu.LoadModule(km);

            // Allocate the memory on the GPU of same size as specified arrays
            dev_fitnesses     = gpu.Allocate <float>(fs);
            dev_fitnessParams = gpu.Allocate <FitnessParameter>(options.NumberOfIndividuals);
            dev_groundTruth   = gpu.CopyToDevice(researchData.GroundTruth);
            dev_userTrust     = gpu.CopyToDevice(researchData.UserTrusts);
            dev_updates       = gpu.CopyToDevice(researchData.Updates);


            //FitnessData dev_fitnessData = gpu.CopyToDevice(fitnessData);
        }
Пример #2
0
        private static CudafyModule GetCudafyModule(Type[] types)
        {
            var joined   = string.Join(",", types.Select(t => t.ToString()));
            var filename = string.Format("{0}.cdfy", Path.Combine(AudioKernelCacheRoot, joined));
            var km       = CudafyModule.TryDeserialize(filename);

            if (km != null && km.TryVerifyChecksums())
            {
                km.Tag = types;
                return(km);
            }

            km = CudafyTranslator.Cudafy(eArchitecture.OpenCL, types);
            Directory.CreateDirectory(AudioKernelCacheRoot);
            km.Serialize(filename);

            km.Tag = types;
            return(km);
        }
Пример #3
0
        public static void Execute()
        {
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, Program.testArchitecture, typeof(SIMDFunctions));

            //CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.sm_12, typeof(SIMDFunctions));
            _gpu.LoadModule(km);
            int w = 1024;
            int h = 1024;

            for (int loop = 0; loop < 3; loop++)
            {
                uint[] a = new uint[w * h];
                Fill(a);
                uint[] dev_a = _gpu.CopyToDevice(a);
                uint[] b     = new uint[w * h];
                Fill(b);
                uint[] dev_b = _gpu.CopyToDevice(b);
                uint[] c     = new uint[w * h];
                uint[] dev_c = _gpu.Allocate(c);
                _gpu.StartTimer();
                _gpu.Launch(h, w, "SIMDFunctionTest", dev_a, dev_b, dev_c);
                _gpu.CopyFromDevice(dev_c, c);
                float time = _gpu.StopTimer();
                Console.WriteLine("Time: {0}", time);
                if (loop == 0)
                {
                    bool    passed = true;
                    GThread thread = new GThread(1, 1, null);
                    for (int i = 0; i < w * h; i++)
                    {
                        uint exp = thread.vadd2(a[i], b[i]);
                        if (exp != c[i])
                        {
                            passed = false;
                        }
                    }
                    Console.WriteLine("Test {0}", passed ? "passed. " : "failed!");
                }
                _gpu.FreeAll();
            }
        }
Пример #4
0
        private void recompileCUDAModule()
        {
            Console.Write("(Re)compiling OpenCL module... ");
            if (_cudaModule == null)
            {
                _cudaModule = new CudafyModule();
            }
            _cudaDevice.UnloadModules();
            _cudaModule.Reset();
            CudafyTranslator.Language = eLanguage.OpenCL;
            _cudaModule = CudafyTranslator.Cudafy(typeof(MandelComputerCUDA));
            //_cudaModule = CudafyTranslator.Cudafy(_cudaModuleSourceInstance);
            _cudaDevice.LoadModule(_cudaModule);
            _cudaNeedsRecompile = false;

            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine("DONE!");
            Console.ForegroundColor = ConsoleColor.Gray;
        }
Пример #5
0
        private static void SimInit()
        {
            Console.WriteLine("Deserializing class");
            CudafyModule km = CudafyModule.TryDeserialize(typeof(Program).Name);

            Console.WriteLine("Got: " + km);
            var tvc = km == null ? false : km.TryVerifyChecksums();

            Console.WriteLine("TVC: " + tvc);

            if (km == null || !tvc)
            {
                Console.WriteLine("Serializing");
                km = CudafyTranslator.Cudafy(typeof(Program));
                km.Serialize();
            }

            Console.WriteLine("Requesting device");
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
            if (_gpu == null)
            {
                _gpu = CudafyHost.GetDevice(eGPUType.OpenCL);
                if (_gpu == null)
                {
                    _gpu = CudafyHost.GetDevice(eGPUType.Emulator);
                    if (_gpu == null)
                    {
                        Console.WriteLine("No deivce found!");
                        return;
                    }
                }
                else
                {
                    Console.WriteLine("Got OpenCL Device: " + _gpu.DeviceId);
                }
            }
            else
            {
                Console.WriteLine("Got CUDA Device: " + _gpu.DeviceId);
            }
            Console.WriteLine("Loading module");
            _gpu.LoadModule(km);
        }
Пример #6
0
        static void popcTest()
        {
            var km = CudafyModule.TryDeserialize(typeof(OpenCLTestClass).Name);

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy(CudafyModes.Architecture, typeof(OpenCLTestClass));
                km.TrySerialize();
            }
            Console.WriteLine(km.SourceCode);

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target);

            gpu.LoadModule(km);

            uint[] v = new uint[N];
            int[]  c = new int[N];

            // allocate the memory on the GPU
            int[] dev_c = gpu.Allocate <int>(c);

            // fill the array 'v'
            for (int i = 0; i < N; i++)
            {
                v[i] = (uint)i;
            }

            // copy the array 'v' to the GPU
            uint[] dev_v = gpu.CopyToDevice(v);
            gpu.Launch(1, N).popVect(dev_v, dev_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_c, c);

            // display the results
            for (int i = 0; i < N; i++)
            {
                //Console.WriteLine("__popc{0} = {1}", v[i], c[i]);
            }

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Пример #7
0
 public static void init(eArchitecture archi = eArchitecture.sm_20, bool hasSdk = false, bool generate = false)
 {
     if (archi == eArchitecture.Emulator)
     {
         CudafyModes.Target = eGPUType.Emulator;
     }
     else if (archi >= eArchitecture.OpenCL)
     {
         CudafyModes.Target = eGPUType.OpenCL;
     }
     if (hasSdk)
     {
         // Build the module
         if (generate || CudafyModes.Target != eGPUType.Cuda)
         {
             if (CudafyModes.Target == eGPUType.Cuda)
             {
                 CudafyTranslator.Language = eLanguage.OpenCL;
             }
             km = CudafyTranslator.Cudafy(archi);
             km.Serialize("bespoke_" + archi);
         }
         else
         {
             km            = new CudafyModule();
             km.SourceCode = System.IO.File.ReadAllText("cuda.cu");
             km.Compile(eGPUCompiler.CudaNvcc);
         }
     }
     else
     {
         // Load the module
         km = CudafyModule.Deserialize(archi.ToString());
     }
     // pretend it has the function it actually has
     if (!generate && !km.Functions.ContainsKey("calc_r"))
     {
         km.Functions.Add("calc_r", new KernelMethodInfo(typeof(RuneCalc), typeof(RuneCalc).GetMethod("calc_r"), eKernelMethodType.Global, false, eCudafyDummyBehaviour.Default, km));
     }
     gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);
     gpu.LoadModule(km);
 }
Пример #8
0
        public static void Basics()
        {
            CudafyModule cm = CudafyTranslator.Cudafy(CudafyModes.Architecture);

            Console.WriteLine(cm.CompilerOutput);
            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(cm);

            int i, total;

            RandStateXORWOW[] devStates   = gpu.Allocate <RandStateXORWOW>(64 * 64);
            int[]             devResults  = gpu.Allocate <int>(64 * 64);
            int[]             hostResults = new int[64 * 64];

            gpu.Set(devResults);
#if !NET35
            gpu.Launch(64, 64).setup_kernel(devStates);
            for (i = 0; i < 10; i++)
            {
                gpu.Launch(64, 64).generate_kernel(devStates, devResults);
            }
#else
            gpu.Launch(64, 64, "setup_kernel", devStates);
            for (i = 0; i < 10; i++)
            {
                gpu.Launch(64, 64, "generate_kernel", devStates, devResults);
            }
#endif


            gpu.CopyFromDevice(devResults, hostResults);

            total = 0;
            for (i = 0; i < 64 * 64; i++)
            {
                total += hostResults[i];
            }
            Console.WriteLine("Fraction with low bit set was {0}", (float)total / (64.0f * 64.0f * 100000.0f * 10.0f));

            gpu.FreeAll();
        }
Пример #9
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            gpu.LoadModule(km);

            int c = 0;
            int[] dev_c = gpu.Allocate<int>(); // cudaMalloc one Int32
            gpu.Launch().add(2, 7, dev_c); // or gpu.Launch(1, 1, "add", 2, 7, dev_c);
            gpu.CopyFromDevice(dev_c, out c);

            Console.WriteLine("2 + 7 = {0}", c);
            gpu.Launch().sub(2, 7, dev_c);
            gpu.CopyFromDevice(dev_c, out c);

            Console.WriteLine("2 - 7 = {0}", c);

            gpu.Free(dev_c);
        }
        private void InitCudaModule()
        {
            cufy.CudafyModes.Target = cufy.eGPUType.Cuda;

            gpu   = CudafyHost.GetDevice(CudafyModes.Target);
            cuGPU = (CUDA)((CudaGPU)gpu).CudaDotNet;
            var ctx = cuGPU.CreateContext(0, CUCtxFlags.MapHost);

            cuGPU.SetCurrentContext(ctx);

            // gpu.EnableSmartCopy();

            module = CudafyModule.TryDeserialize(moduleName);
            if (module == null || !module.TryVerifyChecksums())
            {
                module = CudafyTranslator.Cudafy(typeof(CudafyRBFSlicedEllpackKernel));
                module.Serialize();
            }
            gpu.LoadModule(module);
        }
Пример #11
0
        public ProteinDigest(double[] potentialPrecursors, int maxPeptideLength, int minPeptideLength)
        {
            //Init Gpu access
            CudafyModule km = CudafyTranslator.Cudafy();

            gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            gpu.LoadModule(km);
            dev_prec = gpu.CopyToDevice(potentialPrecursors);

            // allocate the memory on the GPU
            GPGPUProperties properties = gpu.GetDeviceProperties();

            this.maxGridSize      = properties.MaxGridSize.x;
            this.maxPeptideSize   = maxPeptideLength;
            this.peptideArraySize = maxPeptideLength - minPeptideLength + 1;
            this.minPeptideSize   = minPeptideLength;
            this.outputStart      = new int[maxGridSize * peptideArraySize];

            //Allocate vector that will store the results
            dev_outputStart = gpu.Allocate <int>(maxGridSize * peptideArraySize);
        }
Пример #12
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int[] a = new int[N];
            int[] b = new int[N];
            int[] c = new int[N];

            // allocate the memory on the GPU
            int[] dev_c = gpu.Allocate <int>(c);

            // fill the arrays 'a' and 'b' on the CPU
            for (int i = 0; i < N; i++)
            {
                a[i] = i;
                b[i] = i * i;
            }

            // copy the arrays 'a' and 'b' to the GPU
            int[] dev_a = gpu.CopyToDevice(a);
            int[] dev_b = gpu.CopyToDevice(b);

            gpu.Launch(1, N).add(dev_a, dev_b, dev_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_c, c);

            // display the results
            for (int i = 0; i < N; i++)
            {
                Console.WriteLine("{0} + {1} = {2}", a[i], b[i], c[i]);
            }

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Пример #13
0
        public BEP_CUDA(int rozmiarWejsca, int rozmiarWyjscia, int glebokosc)
        {
            iloscWejsc             = rozmiarWejsca;
            iloscWyjsc             = rozmiarWyjscia;
            iloscWarstw            = glebokosc + 2;
            neurony                = new float[iloscWarstw, 32, 32];
            macierzDelt            = new float[iloscWarstw, 32];
            macierzWyjsc           = new float[iloscWarstw + 1, 32];
            macierzWejsc           = new float[iloscWarstw + 1, 32];
            macierzSum             = new float[iloscWarstw, 32];
            iloscNeuronowWWarstwie = new int[iloscWarstw];
            wyjscia                = new int[iloscWarstw + 1];
            odpowiedz              = new float[rozmiarWyjscia];

            numerWarstwy        = new int[1];
            stala               = new float[1];
            iloscWejscWWarstwie = new int[iloscWarstw];
            TworzNeurony();
            CzyscMacierze();
            CudafyTranslator.GenerateDebug = true;
            km = CudafyTranslator.Cudafy(eArchitecture.sm_30, typeof(BEP_CUDA));

            gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            gpu.LoadModule(km);
            neuronyGPU = gpu.CopyToDevice(neurony);
            //gpu.CopyToConstantMemory(neurony, neuronyGPU);
            macierzDeltGPU = gpu.CopyToDevice(macierzDelt);
            //gpu.CopyToConstantMemory(macierzDelt, macierzDeltGPU);
            macierzWejscGPU = gpu.CopyToDevice(macierzWejsc);
            //gpu.CopyToConstantMemory(macierzWejsc, macierzWejscGPU);
            macierzWyjscGPU = gpu.CopyToDevice(macierzWyjsc);
            //gpu.CopyToConstantMemory(macierzWyjsc, macierzWyjscGPU);
            macierzSumGPU = gpu.CopyToDevice(macierzSum);
            //gpu.CopyToConstantMemory(macierzSum, macierzSumGPU);
            wyjsciaGPU = gpu.CopyToDevice(wyjscia);
            //gpu.CopyToConstantMemory(wyjscia, wyjsciaGPU);
            warstwyGPU = gpu.CopyToDevice(iloscNeuronowWWarstwie);
            //gpu.CopyToConstantMemory(iloscNeuronowWWarstwie, warstwyGPU);
            iloscWejscWWarstwieGPU = gpu.CopyToDevice(iloscWejscWWarstwie);
        }
Пример #14
0
        public static void Execute()
        {
            CudafyModule km  = CudafyTranslator.Cudafy(Program.testArchitecture);
            GPGPU        gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);

            gpu.LoadModule(km);

            const int count          = 128;
            var       random         = new Random();
            var       input          = new int[count];
            int       output         = 0;
            int       expectedOutput = 0;

            for (var i = 0; i < count; i++)
            {
                input[i] = random.Next(16);
            }

            for (var i = 0; i < count; i++)
            {
                expectedOutput += (input[i] == 1) ? 1 : 0;
            }

            var devInput  = gpu.Allocate <int>(count);
            var devOutput = gpu.Allocate <int>(1);

            gpu.CopyToDevice(input, devInput);

            gpu.Launch(1, count, "SyncThreadCountKernel", devInput, devOutput);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(devOutput, out output);

            gpu.Free(devInput);
            gpu.Free(devOutput);


            Console.WriteLine("SyncThreadCount: {0}", output);
            Console.WriteLine("Expected: {0} \t{1}", expectedOutput, expectedOutput == output ? "PASSED" : "FAILED");
        }
Пример #15
0
        internal override Answer GetAnswer()
        {
            var stopWatchLoad = Stopwatch.StartNew();

            using (var gpu = CudafyHost.GetDevice()) {
                var arch = gpu.GetDeviceProperties().Capability.GetArchitecture();
                gpu.LoadModule(CudafyTranslator.Cudafy(ePlatform.x64, arch));
                LoadTime = stopWatchLoad.ElapsedMilliseconds;

                var stopWatchRun = Stopwatch.StartNew();
                var gpuLatLong   = gpu.CopyToDevice(_latLong.ToArray());
                var answer       = new AnswerStruct[_blocksPerGrid];;
                var gpuAnswer    = gpu.Allocate(answer);

                gpu.SafeLaunch(_blocksPerGrid, _threadsPerBlock,
                               GpuFindPathDistance, (int)_permutations, gpuLatLong, gpuAnswer);

                gpu.Synchronize();
                gpu.CopyFromDevice(gpuAnswer, answer);
                gpu.FreeAll();

                var bestDistance    = float.MaxValue;
                var bestPermutation = 0;
                for (var i = 0; i < _blocksPerGrid; i++)
                {
                    if (answer[i].distance < bestDistance)
                    {
                        bestDistance    = answer[i].distance;
                        bestPermutation = answer[i].pathNo;
                    }
                }

                return(new AnswerBetter {
                    Distance = bestDistance,
                    Permutation = bestPermutation,
                    msLoadTime = LoadTime,
                    msRunTime = stopWatchRun.ElapsedMilliseconds
                });
            }
        }
    private unsafe static void Main(string[] args)
    {
        GPGPU        gpuCuda = CudafyHost.GetDevice(eGPUType.Cuda, 0);
        CudafyModule km      = CudafyTranslator.Cudafy();

        gpuCuda.LoadModule(km);
        TestStruct[] host_array = new TestStruct[1];
        host_array[0] = new TestStruct();
        int[]       host_intArray = new[] { 1, 8, 3 };
        int[]       dev_intArray  = gpuCuda.CopyToDevice(host_intArray);
        DevicePtrEx p             = gpuCuda.GetDeviceMemory(dev_intArray);
        IntPtr      pointer       = p.Pointer;

        host_array[0].dataPointer = pointer.ToInt64();
        TestStruct[] dev_array = gpuCuda.Allocate(host_array);
        gpuCuda.CopyToDevice(host_array, dev_array);
        gpuCuda.Launch().kernelTest(dev_array, dev_intArray);

        gpuCuda.CopyFromDevice(dev_array, host_array);
        Console.WriteLine(host_array[0].value);
        Console.ReadKey();
    }
Пример #17
0
        public int Init()
        {
            this.m_km = CudafyTranslator.Cudafy();

            CudafyModes.Target = eGPUType.Cuda;
            var tgCount = CudafyHost.GetDeviceCount(CudafyModes.Target);


            if (tgCount <= 0)
            {
                CudafyModes.Target = eGPUType.OpenCL;
                tgCount            = CudafyHost.GetDeviceCount(CudafyModes.Target);
            }

            if (tgCount <= 0)
            {
                CudafyModes.Target = eGPUType.Emulator;
                tgCount            = CudafyHost.GetDeviceCount(CudafyModes.Target);
            }


            if (tgCount <= 0)
            {
                throw new CtkCudafyCannotUseException("無法使用Cudafy");
            }

            for (int idx = 0; idx < tgCount; idx++)
            {
                try
                {
                    this.m_gpu = CudafyHost.GetDevice(CudafyModes.Target, idx);
                    this.m_gpu.LoadModule(Km);
                    return(0);
                }
                catch (Cudafy.CudafyCompileException) { }
            }

            throw new Exception("Cudafy buidling fail.");
        }
Пример #18
0
        internal static Answer GpuTsp()
        {
            var stopWatchLoad = Stopwatch.StartNew();

            using (var gpu = CudafyHost.GetDevice()) {
                gpu.LoadModule(CudafyTranslator.Cudafy());
                LoadTime = stopWatchLoad.ElapsedMilliseconds;

                var stopWatchRun  = Stopwatch.StartNew();
                var gpuLatitudes  = gpu.CopyToDevice(_latitudes.ToArray());
                var gpuLongitudes = gpu.CopyToDevice(_longitudes.ToArray());
                var answer        = new AnswerStruct[_blocksPerGrid];;
                var gpuAnswer     = gpu.Allocate(answer);

                gpu.SafeLaunch(_blocksPerGrid, _threadsPerBlock,
                               GpuFindPathDistance, (int)_permutations, _cities, gpuLatitudes, gpuLongitudes, gpuAnswer);

                gpu.Synchronize();
                gpu.CopyFromDevice(gpuAnswer, answer);

                var bestDistance    = float.MaxValue;
                var bestPermutation = 0;
                for (var i = 0; i < _blocksPerGrid; i++)
                {
                    if (answer[i].distance < bestDistance)
                    {
                        bestDistance    = answer[i].distance;
                        bestPermutation = answer[i].pathNo;
                    }
                }

                return(new Answer {
                    Distance = bestDistance,
                    Permutation = bestPermutation,
                    msLoadTime = LoadTime,
                    msRunTime = stopWatchRun.ElapsedMilliseconds
                });
            }
        }
Пример #19
0
        public static bool InitGPU(PictureBox passedViewport)
        {
            viewport = passedViewport;

            CudafyModes.Target        = eGPUType.OpenCL; // To use OpenCL, change this enum
            CudafyModes.DeviceId      = 0;
            CudafyTranslator.Language = CudafyModes.Target == eGPUType.OpenCL ? eLanguage.OpenCL : eLanguage.Cuda;

            CudafyModule km = null;

            try
            {
                int deviceCount = CudafyHost.GetDeviceCount(CudafyModes.Target);
                if (deviceCount == 0)
                {
                    Console.WriteLine("No suitable {0} devices found.", CudafyModes.Target);
                    return(false);
                }

                gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
                Console.WriteLine("Device Name: {0}", gpu.GetDeviceProperties(false).Name);

                var result = gpu.GetDeviceProperties(true); // diagnostic data

                km = CudafyTranslator.Cudafy();
                gpu.LoadModule(km);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
                Console.WriteLine(km.SourceCode);
                Debugger.Break();
                return(false);
            }

            InitDevicePointers();

            return(true);
        }
Пример #20
0
        /// <summary>
        ///     Вызов и исполнение одной элементарной функции по имени функции
        /// </summary>
        /// <param name="function"></param>
        public static void Execute(string function)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            int[] devA = gpu.Allocate(_a);
            int[] devB = gpu.Allocate(_b);
            int[] devC = gpu.Allocate(_c);
            int[] devD = gpu.Allocate(D);

            gpu.CopyToDevice(_a, devA);

            gpu.Launch(_gridSize, _blockSize, function, devA, devB, devC, devD, 1);
            gpu.Launch(1, 1, function, devA, devB, devC, devD, 2);

            gpu.CopyFromDevice(devD, D);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Пример #21
0
 public static bool cudaEnable()
 {
     if (!isCudaAvailable())
     {
         return(false);
     }
     try
     {
         CudafyModule km = CudafyTranslator.Cudafy(ARCH);
         Console.WriteLine("Translator OK");
         gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
         Console.WriteLine("GPU OK");
         gpu.LoadModule(km);
         enabled = true;
         busy    = false;
         return(true);
     }
     catch (Exception ex)
     {
         errorMessage = ex.ToString();
         return(false);
     }
 }
Пример #22
0
        public GpuRenderer()
        {
            var availableOpenCLDevices = CudafyHost.GetDeviceProperties(eGPUType.OpenCL);

            if (availableOpenCLDevices.Any() == false)
            {
                throw new Exception("No OpenCL devices found...");
            }
            var device = availableOpenCLDevices.First();

            Module = CudafyTranslator.Cudafy(eArchitecture.OpenCL12);
            var blockSide =
                Enumerable
                .Range(1, 15)
                .Reverse()
                .First(count => count * count <= device.MaxThreadsPerBlock);

            BlockSize = new dim3(blockSide, blockSide);

            // Initialize gpu and load the module (avoids reloading every time)
            gpu = CudafyHost.GetDevice(eGPUType.OpenCL);
            gpu.LoadModule(Module);
        }
        public void ExeTestKernel()
        {
            GPGPU         gpu  = CudafyHost.GetDevice(CudafyModes.Target, 0);
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            int[] host_results = new int[N];

            // Either assign a new block of memory to hold results on device
            var dev_results = gpu.Allocate <int>(N);

            // Or fill your array with values first and then
            for (int i = 0; i < N; i++)
            {
                host_results[i] = i * 3;
            }

            // Copy array with ints to device
            var dev_filled_results = gpu.CopyToDevice(host_results);

            // 64*16 = 1024 threads per block (which is max for sm_30)
            dim3 threadsPerBlock = new dim3(64, 16);

            // 8*8 = 64 blocks per grid , just for show so you get varying numbers
            // 64 blocks * 1024 threads = 65536
            // it's useful to align the number of threads with the amount of data (notice int[65536], i.e. 1 thread per int in the array)
            dim3 blocksPerGrid = new dim3(8, 8);

            //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0
            //var blocksPerGrid = 1;      // just for show

            gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results);

            gpu.CopyFromDevice(dev_results, host_results);
        }
Пример #24
0
        static void TempOpenCLVectorAddTest()
        {
            int[]  inputData1 = new int[N];
            int[]  inputData2 = new int[N];
            int[]  inputData3 = new int[N];
            int[]  outputData = new int[N];
            Random rand       = new Random();

            for (int i = 0; i < N; i++)
            {
                inputData1[i] = rand.Next(128);
                inputData2[i] = rand.Next(128);
                inputData3[i] = 2;
            }

            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda, 0);

            Console.WriteLine(gpu.GetDeviceProperties().Name);
            CudafyTranslator.Language = eLanguage.Cuda;
            var mod = CudafyTranslator.Cudafy(CudafyModes.Architecture, typeof(OpenCLTestClass));

            //mod.CudaSourceCode
            Console.WriteLine(mod.SourceCode);
            gpu.LoadModule(mod);
            int[] dev_data1 = gpu.CopyToDevice(inputData1);
            int[] dev_data2 = gpu.CopyToDevice(inputData2);
            gpu.CopyToConstantMemory(inputData3, OpenCLTestClass.ConstantMemory);
            int[] dev_res = gpu.Allocate <int>(N);
#warning Work group and local size mess! http://stackoverflow.com/questions/7996537/cl-invalid-work-group-size-error-should-be-solved-though
            gpu.Launch(2, 512).VectorAdd(dev_data1, dev_data2, dev_res);
            gpu.CopyFromDevice(dev_res, 0, outputData, 0, N);

            for (int i = 0; i < N; i++)
            {
                Assert.AreEqual((inputData1[i] + inputData2[i]) * inputData3[i], outputData[i], string.Format("Error at {0}", i));
            }
        }
Пример #25
0
        public static void Execute()
        {
            var km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.sm_20,
                                             typeof(ValueB),
                                             typeof(ValueA),
                                             typeof(StructTest));

                km.Serialize();
            }

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);

            gpu.LoadModule(km);

            var value = new ValueA();

            value.valueB       = new ValueB();
            value.valueB.value = 56;

            var devOutput = gpu.Allocate <int>(1);

            gpu.Launch(1, 1, "StructTestKernel", value, devOutput);

            int output;

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(devOutput, out output);

            gpu.Free(devOutput);

            Console.WriteLine("Expected: {0} \t{1}", 56, 56 == output ? "PASSED" : "FAILED");
        }
Пример #26
0
        public static void Execute()
        {
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(TextInsertion));

            Console.WriteLine(km.CompilerOutput);
            _gpu.LoadModule(km);

            int[] data   = new int[64];
            int[] data_d = _gpu.CopyToDevice(data);
            int[] res_d  = _gpu.Allocate(data);
            int[] res    = new int[64];
            _gpu.Launch(1, 1, "AHybridMethod", data_d, res_d);
            _gpu.CopyFromDevice(data_d, res);
            for (int i = 0; i < 64; i++)
            {
                if (data[i] != res[i])
                {
                    Console.WriteLine("Failed");
                    break;
                }
            }
        }
Пример #27
0
        private void InitializeGPUs()
        {
            eGPUType[]  gpuTypes  = new eGPUType[] { eGPUType.Cuda, eGPUType.OpenCL, eGPUType.Emulator };
            eLanguage[] languages = new eLanguage[] { eLanguage.Cuda, eLanguage.OpenCL };

            foreach (eGPUType gpuType in gpuTypes)
            {
                try
                {
                    int numberOfAvailableDevices = CudafyHost.GetDeviceCount(gpuType);

                    for (int deviceNumber = 0; deviceNumber < numberOfAvailableDevices; deviceNumber++)
                    {
                        GPGPU           gpgpu           = CudafyHost.GetDevice(gpuType, deviceNumber);
                        GPGPUProperties gpgpuProperties = gpgpu.GetDeviceProperties(true);
                        CudafyModes.Target = gpuType;

                        foreach (eLanguage language in languages)
                        {
                            string cudaRandomFilename = Path.GetRandomFileName();

                            try
                            {
                                CudafyTranslator.Language = language;

                                CompileProperties compileProperties = CompilerHelper.Create(ePlatform.Auto, eArchitecture.Unknown, eCudafyCompileMode.Default, CudafyTranslator.WorkingDirectory, CudafyTranslator.GenerateDebug);

                                // Use a random filename to prevent conflict on default temp file when multithreading (unit tests)
                                compileProperties.InputFile = cudaRandomFilename;

                                // If this line fails with NCrunch/Unit tests, there probably is a new version of Cudafy.NET
                                // and it needs to be registered in the GAC like this: gacutil -i Cudafy.NET.dll
                                CudafyModule cudafyModule = CudafyTranslator.Cudafy(compileProperties, typeof(Primitives));

                                if (!gpgpu.IsModuleLoaded(cudafyModule.Name))
                                {
                                    gpgpu.LoadModule(cudafyModule);
                                }

                                gpgpu.EnableMultithreading();

                                string gpuName = gpgpuProperties.Name.Trim() + " - " + gpuType.ToString() + " - " + language.ToString();

                                ////this.gpgpus.Add(gpuName, gpgpu);
                                ////this.gpgpuProperties.Add(gpuName, gpgpuProperties);
                                ////this.gpuTypes.Add(gpuName, gpuType);
                            }
                            catch (CudafyCompileException)
                            {
                                // Language not supported
                            }
                            finally
                            {
                                File.Delete(cudaRandomFilename);

                                // ncrunch: no coverage start
                            }
                        }
                    }
                }
                catch (DllNotFoundException)
                {
                }
                catch (InvalidOperationException)
                {
                    // Language not supported
                }
                catch (Cloo.ComputeException)
                {
                    // Language not supported
                } // ncrunch: no coverage end
            }
        }
        public static int Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            if (gpu is CudaGPU && gpu.GetDeviceProperties().Capability < new Version(1, 2))
            {
                Console.WriteLine("Compute capability 1.2 or higher required for atomics.");
                return(-1);
            }
            gpu.LoadModule(km);

            byte[] buffer = big_random_block(SIZE);

            // cudart.dll must be accessible!
            GPGPUProperties prop = null;

            try
            {
                prop = gpu.GetDeviceProperties(true);
            }
            catch (DllNotFoundException)
            {
                prop = gpu.GetDeviceProperties(false);
            }

            // capture the start time
            // starting the timer here so that we include the cost of
            // all of the operations on the GPU.  if the data were
            // already on the GPU and we just timed the kernel
            // the timing would drop from 74 ms to 15 ms.  Very fast.
            gpu.StartTimer();

            // allocate memory on the GPU for the file's data
            byte[] dev_buffer = gpu.CopyToDevice(buffer);
            uint[] dev_histo  = gpu.Allocate <uint>(256);
            gpu.Set(dev_histo);

            // kernel launch - 2x the number of mps gave best timing
            int blocks = prop.MultiProcessorCount;

            if (blocks == 0)
            {
                blocks = 16;
            }
            Console.WriteLine("Processors: {0}", blocks);
            gpu.Launch(blocks * 2, 256).histo_kernel(dev_buffer, SIZE, dev_histo);

            uint[] histo = new uint[256];
            gpu.CopyFromDevice(dev_histo, histo);

            // get stop time, and display the timing results
            float elapsedTime = gpu.StopTimer();

            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            long histoCount = 0;

            for (int i = 0; i < 256; i++)
            {
                histoCount += histo[i];
            }
            Console.WriteLine("Histogram Sum:  {0}", histoCount);

            // verify that we have the same counts via CPU
            for (int i = 0; i < SIZE; i++)
            {
                histo[buffer[i]]--;
            }
            for (int i = 0; i < 256; i++)
            {
                if (histo[i] != 0)
                {
                    Console.WriteLine("Failure at {0}!", i);
                }
            }

            gpu.FreeAll();

            return(0);
        }
Пример #29
0
 public static void prepareGPU()
 {
     km  = CudafyTranslator.Cudafy();
     gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
     gpu.LoadModule(km);
 }
Пример #30
0
        public static char[] Execute(String[] keys, string I, int n)
        {
            GPGPU         gpu  = CudafyHost.GetDevice(CudafyModes.Target, 0);
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            Stopwatch xxxx = new Stopwatch();

            xxxx.Start();
            StringSearch abb      = new StringSearch(keys);
            string       alphabet = "ABCDEFGHI*KLMN*PQRST*VWXYZ";
            int          alpha    = alphabet.Length;

            int[,] table1 = new int[StringSearch.nodeCount, alpha];
            for (int i = 0; i < StringSearch.nodeCount; i++)
            {
                for (int j = 0; j < alpha; j++)
                {
                    table1[i, j] = -1;
                }
            }

            abb.build_table1(table1, abb._root);
            char[] input  = I.ToCharArray();
            int    length = I.Length;

            I = "";
            int[] output_table = new int[StringSearch.nodeCount];
            abb.build_tableO(output_table, abb._root);
            abb = new StringSearch();
            char[] matched_result = new char[length];
            xxxx.Stop();

            //CudafyModule km = CudafyModule.TryDeserialize();
            //if (km == null || !km.TryVerifyChecksums())
            //{
            //    km = CudafyTranslator.Cudafy();
            //    km.Serialize();
            //    gpu.LoadModule(km);
            //}

            gpu.SetCurrentContext();
            int[] tempas = new int[StringSearch.nodeCount];
            int[,] tempbab  = new int[StringSearch.nodeCount, alpha];
            int[,] table1_d = gpu.Allocate <int>(tempbab);
            int[]  output_table_d   = gpu.Allocate <int>(tempas);
            char[] matched_result_d = gpu.Allocate <char>(length);
            char[] input_d          = gpu.Allocate <char>(length);
            int[]  input_length_d   = gpu.Allocate <int>(1);
            int[]  input_length     = { length };
            gpu.CopyToDevice(table1, table1_d);
            gpu.CopyToDevice(output_table, output_table_d);
            gpu.CopyToDevice(matched_result, matched_result_d);
            gpu.CopyToDevice(input, input_d);
            gpu.CopyToDevice(input_length, input_length_d);
            int block = (int)Math.Ceiling((double)length / N);

            gpu.Launch(block, N).Dot(table1_d, output_table_d, matched_result_d, input_d, input_length_d);
            gpu.CopyFromDevice(matched_result_d, matched_result);
            gpu.FreeAll();
            return(matched_result);
        }