Esempio n. 1
0
        /// <summary>
        /// The GPUParams constructor.
        /// </summary>
        /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param>
        /// <param name="log">Specifies the Log for output.</param>
        /// <param name="root_solver">Specifies the root Solver.</param>
        /// <param name="nDeviceID">Specifies the device ID to use for this instance.</param>
        public GPUParams(CudaDnn <T> cuda, Log log, Solver <T> root_solver, int nDeviceID)
            : base(root_solver)
        {
            m_cuda = cuda;
            m_log  = log;

            m_nDeviceID = m_cuda.GetDeviceID();

            if (nDeviceID != m_nDeviceID)
            {
                m_cuda.SetDeviceID(nDeviceID);
            }

            // Allocate device buffers
            m_hData = m_cuda.AllocMemory(m_lCount);

            // Copy blob values
            BlobCollection <T> net = root_solver.net.learnable_parameters;

            apply_buffers(net, m_hData, m_lCount, Op.copy);

            m_hDiff = m_cuda.AllocMemory(m_lCount);
            m_cuda.set((int)m_lCount, m_hDiff, 0);

            m_hStream = m_cuda.CreateStream();

            if (m_nDeviceID != nDeviceID)
            {
                m_cuda.SetDeviceID(m_nDeviceID);
            }
        }
Esempio n. 2
0
 /// <summary>
 /// Allocate a number of items in GPU memory and save the handle.
 /// </summary>
 /// <param name="lCount">Specifies the number of items.</param>
 public void Allocate(long lCount)
 {
     free();
     m_nDeviceID = m_cuda.GetDeviceID();
     m_hGpuData  = m_cuda.AllocMemory(lCount);
     m_lCapacity = lCount;
     m_lCount    = 0;
     m_bOwnData  = true;
     return;
 }
Esempio n. 3
0
        /// <summary>
        /// The SyncedMemory constructor.
        /// </summary>
        /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param>
        /// <param name="log">Specifies the Log for output.</param>
        /// <param name="lCapacity">Optionally, specifies the capacity of the SyncedMemory (in items).</param>
        /// <param name="tag">Optionally, specifies a tag used for debugging (the default = <i>null</i>).</param>
        /// <param name="bUseHalfSize">Optionally, specifies to use half size (FP16) for both data and diff.  This option is only available when using the <i>float</i> base type 'T'.</param>
        public SyncedMemory(CudaDnn <T> cuda, Log log, long lCapacity = 0, object tag = null, bool bUseHalfSize = false)
        {
            if (bUseHalfSize && typeof(T) != typeof(float))
            {
                bUseHalfSize = false;

                if (log != null)
                {
                    log.WriteLine("Half size disabled for non 'float' basetypes!");
                }
            }

            m_bHalfSize = bUseHalfSize;
            m_cuda      = cuda;
            m_log       = log;
            m_tag       = tag;

            if (lCapacity > 0)
            {
                m_nDeviceID = m_cuda.GetDeviceID();
                m_hGpuData  = m_cuda.AllocMemory(lCapacity);
                m_lCapacity = lCapacity;
                m_lCount    = lCapacity;
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Allocate a number of items in GPU memory and save the handle.
        /// </summary>
        /// <param name="lCount">Specifies the number of items.</param>
        /// <param name="bUseHalfSize">Optionally, specifis to use half sized memory (default = false).  This only applies to the 'float' base type.</param>
        public void Allocate(long lCount, bool bUseHalfSize = false)
        {
            if (bUseHalfSize && typeof(T) != typeof(float))
            {
                bUseHalfSize = false;

                if (m_log != null)
                {
                    m_log.WriteLine("Half size disabled for non 'float' basetypes!");
                }
            }

            free();
            m_nDeviceID = m_cuda.GetDeviceID();
            m_bHalfSize = bUseHalfSize;
            m_hGpuData  = m_cuda.AllocMemory(lCount, m_bHalfSize);
            m_lCapacity = lCount;
            m_lCount    = 0;
            m_bOwnData  = true;
            return;
        }
Esempio n. 5
0
        /// <summary>
        /// The SyncedMemory constructor.
        /// </summary>
        /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param>
        /// <param name="log">Specifies the Log for output.</param>
        /// <param name="lCapacity">Optionally, specifies the capacity of the SyncedMemory (in items).</param>
        public SyncedMemory(CudaDnn <T> cuda, Log log, long lCapacity = 0)
        {
            m_cuda = cuda;
            m_log  = log;

            if (lCapacity > 0)
            {
                m_nDeviceID = m_cuda.GetDeviceID();
                m_hGpuData  = m_cuda.AllocMemory(lCapacity);
                m_lCapacity = lCapacity;
                m_lCount    = lCapacity;
            }
        }
Esempio n. 6
0
        static void Main(string[] args)
        {
            // This memory will reside on the GPU.
            long hGpuMem = 0;

            Console.WriteLine("Creating CudaCuDnn...");
            CudaDnn <float> cuda = new CudaDnn <float>(0);

            try
            {
                string strDeviceInfo = cuda.GetDeviceName(0);
                Console.WriteLine(strDeviceInfo);
                strDeviceInfo = cuda.GetDeviceP2PInfo(0);
                Console.WriteLine(strDeviceInfo);
                strDeviceInfo = cuda.GetDeviceInfo(0, true);
                Console.WriteLine(strDeviceInfo);

                List <long> rghGpuMem = new List <long>();
                long        lOffset   = 0;

                // You must first allocate the GPU memory to use.
                // Below we will allocate an array of 1000 float values.
                Console.WriteLine("Allocate 1000 items...");
                hGpuMem = cuda.AllocMemory(1000);
                cuda.set(1000, hGpuMem, 0.0);

                Console.WriteLine("Create memory pointers...");
                for (int i = 0; i < 10; i++)
                {
                    long hMem1 = cuda.CreateMemoryPointer(hGpuMem, lOffset, 100);
                    cuda.set(100, hMem1, (double)(i + 1));
                    rghGpuMem.Add(hMem1);
                    lOffset += 100;
                }

                Console.WriteLine("Test memory...");
                for (int i = 0; i < 10; i++)
                {
                    long    hMem1  = rghGpuMem[i];
                    float[] rgData = cuda.GetMemoryFloat(hMem1);

                    if (rgData.Length != 100)
                    {
                        throw new Exception("The data length should = 100!");
                    }

                    for (int j = 0; j < 100; j++)
                    {
                        if (rgData[j] != (float)(i + 1))
                        {
                            throw new Exception("The data at index " + j.ToString() + " is not correct!");
                        }
                    }
                }

                Console.WriteLine("Memory test passed successfully!");
            }
            catch (Exception excpt)
            {
                Console.WriteLine("ERROR: " + excpt.Message);
            }
            finally
            {
                // Clean-up and release all GPU memory used.
                if (hGpuMem != 0)
                {
                    cuda.FreeMemory(hGpuMem);
                    hGpuMem = 0;
                }

                cuda.Dispose();
            }

            Console.WriteLine("Press any key to exit.");
            Console.Read();
        }