Beispiel #1
0
        static double[] SumMatrixManagedCuda(double[][,] matrix)
        {
            int Z = matrix.Length;
            int Y = matrix[0].GetLength(0);
            int X = matrix[0].GetLength(1);

            var result = new double[Y * X];
            var lm     = ToLinearArray(matrix);
            int N      = lm.Length;

            matrixSumCude.SetComputeSize((uint)X, (uint)Y);
            //matrixSumCude.BlockDimensions = 128;
            //matrixSumCude.GridDimensions = (N + 127) / 128;

            var da = cntxt.AllocateMemory(N * sizeof(double));
            var db = cntxt.AllocateMemory(result.Length * sizeof(double));

            cntxt.CopyToDevice(da, lm);
            cntxt.CopyToDevice(db, result);

            //CudaDeviceVariable<int> dA = a;
            //CudaDeviceVariable<int> dB = b;
            //CudaDeviceVariable<int> dC = new CudaDeviceVariable<int>(N);

            // Invoke kernel
            //kernel.Run(dA.DevicePointer, dC.DevicePointer, dimX, dimY, dimZ);
            matrixSumCude.Run(db, da, X, Y, Z);

            cntxt.CopyToHost <double>(result, db);

            return(result);
        }
        public IDeviceMemory Allocate(long byteCount)
        {
            long size = PadToAlignment(byteCount, MemoryAlignment);

            lock (locker)
            {
                //   allocatedSize += size;
                if (pools.TryGetValue(size, out Queue <IDeviceMemory> sizedPool))
                {
                    if (sizedPool.Count > 0)
                    {
                        IDeviceMemory result = sizedPool.Dequeue();

                        // HACK  bizarrely, Queue.Dequeue appears to sometimes return null, even when there are many elements in the queue,
                        // and when the queue is only ever accessed from one thread.
                        if (result != null)
                        {
                            return(result);
                        }
                    }
                }
                else
                {
                    sizedPool = new Queue <IDeviceMemory>();
                    pools.Add(size, sizedPool);
                }

                CUdeviceptr buffer;
                try
                {
                    try
                    {
                        // If control flow gets to this point, sizedPool exists in the dictionary and is empty.
                        context.SetCurrent();
                        buffer = context.AllocateMemory(size);
                    }
                    catch (ManagedCuda.CudaException)
                    {
                        FreeMemory(false);
                        buffer = context.AllocateMemory(size);
                    }
                }
                catch (ManagedCuda.CudaException)
                {
                    FreeMemory(true);
                    buffer = context.AllocateMemory(size);
                }

                BasicDeviceMemory devMemory = null;
                devMemory = new BasicDeviceMemory(buffer, () =>
                {
                    lock (locker)
                    {
                        sizedPool.Enqueue(devMemory);
                    }
                });

                return(devMemory);
            }
        }
Beispiel #3
0
        internal IDeviceMemoryPtr MultiEuclideanDistance(IDeviceMemoryPtr vector, CUdeviceptr[] compareTo, int size)
        {
            IDeviceMemoryPtr ret = null;
            var buffer           = _cuda.AllocateMemory(8 * compareTo.Length);

            try {
                _cuda.CopyToDevice(buffer, compareTo);
                ret = Allocate(size * compareTo.Length);
                _Use(_multiEuclidean, size, compareTo.Length, k => k.Run(0, vector.DevicePointer, buffer, ret.DevicePointer, size, compareTo.Length));
            }
            finally {
                _cuda.FreeMemory(buffer);
            }
            return(ret);
        }
Beispiel #4
0
        internal CudaDeviceVariable <float> MultiEuclideanDistance(CudaDeviceVariable <float> vector, CUdeviceptr[] compareTo, int size)
        {
            CudaDeviceVariable <float> ret = null;
            var buffer = _cuda.AllocateMemory(8 * compareTo.Length);

            try {
                _cuda.CopyToDevice(buffer, compareTo);
                ret = new CudaDeviceVariable <float>(size * compareTo.Length);
                _Use(_multiEuclidean, size, compareTo.Length, k => k.Run(0, vector.DevicePointer, buffer, ret.DevicePointer, size, compareTo.Length));
            }
            finally {
                _cuda.FreeMemory(buffer);
            }
            return(ret);
        }
        /// <summary>
        /// Allocates the specified byte count.
        /// </summary>
        /// <param name="byteCount">The byte count.</param>
        /// <returns>IDeviceMemory.</returns>
        public IDeviceMemory Allocate(long byteCount)
        {
            var size = PadToAlignment(byteCount, MemoryAlignment);

            Queue<IDeviceMemory> sizedPool;
            if (pools.TryGetValue(size, out sizedPool))
            {
                if (sizedPool.Count > 0)
                {
                    var result = sizedPool.Dequeue();

                    // HACK  bizarrely, Queue.Dequeue appears to sometimes return null, even when there are many elements in the queue,
                    // and when the queue is only ever accessed from one thread.
                    if(result != null)
                        return result;
                }
            }
            else
            {
                sizedPool = new Queue<IDeviceMemory>();
                pools.Add(size, sizedPool);
            }

            // If control flow gets to this point, sizedPool exists in the dictionary and is empty.

            var buffer = context.AllocateMemory(size);
            BasicDeviceMemory devMemory = null;
            devMemory = new BasicDeviceMemory(buffer, () =>
            {
                sizedPool.Enqueue(devMemory);
            });

            return devMemory;
        }
Beispiel #6
0
        /// <summary>
        /// Allocs the scratch space.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="deviceProps">The device props.</param>
        /// <returns>ScratchSpace.</returns>
        private static ScratchSpace AllocScratchSpace(CudaContext context, CudaDeviceProperties deviceProps)
        {
            var size   = ScratchSpacePerSMStream * deviceProps.MultiProcessorCount;
            var buffer = context.AllocateMemory(size);

            return(new ScratchSpace()
            {
                size = size, buffer = buffer
            });
        }
Beispiel #7
0
        private static ScratchSpace AllocScratchSpace(CudaContext context, CudaDeviceProperties deviceProps)
        {
            int size = ScratchSpacePerSMStream * deviceProps.MultiProcessorCount;

            ManagedCuda.BasicTypes.CUdeviceptr buffer = context.AllocateMemory(size);
            return(new ScratchSpace()
            {
                size = size, buffer = buffer
            });
        }
Beispiel #8
0
        public PoolingDeviceAllocator(CudaContext context, float memoryUsageRatio = 0.9f)
        {
            m_context = context;
            context.SetCurrent();

            m_ulAvailMemByteInTotal = (ulong)((ulong)context.GetFreeDeviceMemorySize() * memoryUsageRatio);

            m_memPoolPtr = context.AllocateMemory(m_ulAvailMemByteInTotal);

            m_startMemAddr = m_memPoolPtr.Pointer;
            m_endMemAddr   = m_startMemAddr + m_ulAvailMemByteInTotal;

            m_usedAddr2Size = new SortedDictionary <ulong, ulong>();

            Logger.WriteLine($"Allocated Cuda memory: {m_ulAvailMemByteInTotal}, address from '{m_startMemAddr}' to '{m_endMemAddr}'");
        }
        public uint[] Run()
        {
            var ptx = @"C:\Src\_Tree\SmallPrograms\Buddhabrot\Buddhabrot.Cuda70\x64\Release\Buddhabrot.ptx";

            var context = new CudaContext();
            var module = new CudaModuleHelper(context, ptx);

            var init = module.GetKernel("Init");
            var setSettings = module.GetKernel("SetSettings");
            var runBuddha = module.GetKernel("RunBuddha");

            var nBlocks = 4196;
            var nThreads = 256;

            var dSettings = context.AllocateMemoryFor(settings);
            context.CopyToDevice(dSettings, settings);

            var array = new uint[settings.Width * settings.Height];
            var dState = context.AllocateMemory(nThreads * nBlocks * SizeOfCurandState);
            var dArray = context.AllocateMemoryFor(array);
            context.CopyToDevice(dArray, array);

            init.Launch(nBlocks, nThreads, dState);
            setSettings.Launch(1, 1, dSettings);

            Console.WriteLine("Starting...");
            var sw = Stopwatch.StartNew();
            long i = 0;

            while (!IsStopping)
            {
                runBuddha.Launch(nBlocks, nThreads, dArray, dState);

                double count = (++i * nBlocks * nThreads);
                if (i % 5 == 0)
                {
                    Console.WriteLine("Generated {0:0.0} Million samples in {1:0.000} sec", count / 1000000.0, sw.ElapsedMilliseconds / 1000.0);
                }

                if (maxSamples.HasValue && count >= maxSamples)
                    break;
            }

            context.CopyToHost(array, dArray);
            return array;
        }
Beispiel #10
0
 static void Test(byte[] ptxFile)
 {
     const int size = 16;
     var context = new CudaContext();
     var kernel = context.LoadKernelPTX(ptxFile, "kernel");
     var memory = context.AllocateMemory(4 * size);
     var gpuMemory = new CudaDeviceVariable<int>(memory);
     var cpuMemory = new int[size];
     for (var i = 0; i < size; i++)
         cpuMemory[i] = i - 2;
     gpuMemory.CopyToDevice(cpuMemory);
     kernel.BlockDimensions = 4;
     kernel.GridDimensions = 4;
     kernel.Run(memory);
     gpuMemory.CopyToHost(cpuMemory);
     for (var i = 0; i < size; i++)
         Console.WriteLine("{0} = {1}", i, cpuMemory[i]);
 }
Beispiel #11
0
        public IDeviceMemory Allocate(long byteCount)
        {
            var buffer = context.AllocateMemory(byteCount);

            return(new BasicDeviceMemory(buffer, () => context.FreeMemory(buffer)));
        }