/// <summary> /// The GPUParams constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="root_solver">Specifies the root Solver.</param> /// <param name="nDeviceID">Specifies the device ID to use for this instance.</param> public GPUParams(CudaDnn <T> cuda, Log log, Solver <T> root_solver, int nDeviceID) : base(root_solver) { m_cuda = cuda; m_log = log; m_nDeviceID = m_cuda.GetDeviceID(); if (nDeviceID != m_nDeviceID) { m_cuda.SetDeviceID(nDeviceID); } // Allocate device buffers m_hData = m_cuda.AllocMemory(m_lCount); // Copy blob values BlobCollection <T> net = root_solver.net.learnable_parameters; apply_buffers(net, m_hData, m_lCount, Op.copy); m_hDiff = m_cuda.AllocMemory(m_lCount); m_cuda.set((int)m_lCount, m_hDiff, 0); m_hStream = m_cuda.CreateStream(); if (m_nDeviceID != nDeviceID) { m_cuda.SetDeviceID(m_nDeviceID); } }
/// <summary> /// Allocate a number of items in GPU memory and save the handle. /// </summary> /// <param name="lCount">Specifies the number of items.</param> public void Allocate(long lCount) { free(); m_nDeviceID = m_cuda.GetDeviceID(); m_hGpuData = m_cuda.AllocMemory(lCount); m_lCapacity = lCount; m_lCount = 0; m_bOwnData = true; return; }
/// <summary> /// The SyncedMemory constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="lCapacity">Optionally, specifies the capacity of the SyncedMemory (in items).</param> /// <param name="tag">Optionally, specifies a tag used for debugging (the default = <i>null</i>).</param> /// <param name="bUseHalfSize">Optionally, specifies to use half size (FP16) for both data and diff. This option is only available when using the <i>float</i> base type 'T'.</param> public SyncedMemory(CudaDnn <T> cuda, Log log, long lCapacity = 0, object tag = null, bool bUseHalfSize = false) { if (bUseHalfSize && typeof(T) != typeof(float)) { bUseHalfSize = false; if (log != null) { log.WriteLine("Half size disabled for non 'float' basetypes!"); } } m_bHalfSize = bUseHalfSize; m_cuda = cuda; m_log = log; m_tag = tag; if (lCapacity > 0) { m_nDeviceID = m_cuda.GetDeviceID(); m_hGpuData = m_cuda.AllocMemory(lCapacity); m_lCapacity = lCapacity; m_lCount = lCapacity; } }
/// <summary> /// Allocate a number of items in GPU memory and save the handle. /// </summary> /// <param name="lCount">Specifies the number of items.</param> /// <param name="bUseHalfSize">Optionally, specifis to use half sized memory (default = false). This only applies to the 'float' base type.</param> public void Allocate(long lCount, bool bUseHalfSize = false) { if (bUseHalfSize && typeof(T) != typeof(float)) { bUseHalfSize = false; if (m_log != null) { m_log.WriteLine("Half size disabled for non 'float' basetypes!"); } } free(); m_nDeviceID = m_cuda.GetDeviceID(); m_bHalfSize = bUseHalfSize; m_hGpuData = m_cuda.AllocMemory(lCount, m_bHalfSize); m_lCapacity = lCount; m_lCount = 0; m_bOwnData = true; return; }
/// <summary> /// The SyncedMemory constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="lCapacity">Optionally, specifies the capacity of the SyncedMemory (in items).</param> public SyncedMemory(CudaDnn <T> cuda, Log log, long lCapacity = 0) { m_cuda = cuda; m_log = log; if (lCapacity > 0) { m_nDeviceID = m_cuda.GetDeviceID(); m_hGpuData = m_cuda.AllocMemory(lCapacity); m_lCapacity = lCapacity; m_lCount = lCapacity; } }
static void Main(string[] args) { // This memory will reside on the GPU. long hGpuMem = 0; Console.WriteLine("Creating CudaCuDnn..."); CudaDnn <float> cuda = new CudaDnn <float>(0); try { string strDeviceInfo = cuda.GetDeviceName(0); Console.WriteLine(strDeviceInfo); strDeviceInfo = cuda.GetDeviceP2PInfo(0); Console.WriteLine(strDeviceInfo); strDeviceInfo = cuda.GetDeviceInfo(0, true); Console.WriteLine(strDeviceInfo); List <long> rghGpuMem = new List <long>(); long lOffset = 0; // You must first allocate the GPU memory to use. // Below we will allocate an array of 1000 float values. Console.WriteLine("Allocate 1000 items..."); hGpuMem = cuda.AllocMemory(1000); cuda.set(1000, hGpuMem, 0.0); Console.WriteLine("Create memory pointers..."); for (int i = 0; i < 10; i++) { long hMem1 = cuda.CreateMemoryPointer(hGpuMem, lOffset, 100); cuda.set(100, hMem1, (double)(i + 1)); rghGpuMem.Add(hMem1); lOffset += 100; } Console.WriteLine("Test memory..."); for (int i = 0; i < 10; i++) { long hMem1 = rghGpuMem[i]; float[] rgData = cuda.GetMemoryFloat(hMem1); if (rgData.Length != 100) { throw new Exception("The data length should = 100!"); } for (int j = 0; j < 100; j++) { if (rgData[j] != (float)(i + 1)) { throw new Exception("The data at index " + j.ToString() + " is not correct!"); } } } Console.WriteLine("Memory test passed successfully!"); } catch (Exception excpt) { Console.WriteLine("ERROR: " + excpt.Message); } finally { // Clean-up and release all GPU memory used. if (hGpuMem != 0) { cuda.FreeMemory(hGpuMem); hGpuMem = 0; } cuda.Dispose(); } Console.WriteLine("Press any key to exit."); Console.Read(); }