/// <summary> /// Allocate a number of items in GPU memory and save the handle. /// </summary> /// <param name="lCount">Specifies the number of items.</param> public void Allocate(long lCount) { free(); m_nDeviceID = m_cuda.GetDeviceID(); m_hGpuData = m_cuda.AllocMemory(lCount); m_lCapacity = lCount; m_lCount = 0; m_bOwnData = true; return; }
/// <summary> /// The SyncedMemory constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="lCapacity">Optionally, specifies the capacity of the SyncedMemory (in items).</param> /// <param name="tag">Optionally, specifies a tag used for debugging (the default = <i>null</i>).</param> /// <param name="bUseHalfSize">Optionally, specifies to use half size (FP16) for both data and diff. This option is only available when using the <i>float</i> base type 'T'.</param> public SyncedMemory(CudaDnn <T> cuda, Log log, long lCapacity = 0, object tag = null, bool bUseHalfSize = false) { if (bUseHalfSize && typeof(T) != typeof(float)) { bUseHalfSize = false; if (log != null) { log.WriteLine("Half size disabled for non 'float' basetypes!"); } } m_bHalfSize = bUseHalfSize; m_cuda = cuda; m_log = log; m_tag = tag; if (lCapacity > 0) { m_nDeviceID = m_cuda.GetDeviceID(); m_hGpuData = m_cuda.AllocMemory(lCapacity); m_lCapacity = lCapacity; m_lCount = lCapacity; } }
/// <summary> /// The GPUParams constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="root_solver">Specifies the root Solver.</param> /// <param name="nDeviceID">Specifies the device ID to use for this instance.</param> public GPUParams(CudaDnn <T> cuda, Log log, Solver <T> root_solver, int nDeviceID) : base(root_solver) { m_cuda = cuda; m_log = log; m_nDeviceID = m_cuda.GetDeviceID(); if (nDeviceID != m_nDeviceID) { m_cuda.SetDeviceID(nDeviceID); } // Allocate device buffers m_hData = m_cuda.AllocMemory(m_lCount); // Copy blob values BlobCollection <T> net = root_solver.net.learnable_parameters; apply_buffers(net, m_hData, m_lCount, Op.copy); m_hDiff = m_cuda.AllocMemory(m_lCount); m_cuda.set((int)m_lCount, m_hDiff, 0); m_hStream = m_cuda.CreateStream(); if (m_nDeviceID != nDeviceID) { m_cuda.SetDeviceID(m_nDeviceID); } }
/// <summary> /// Allocate a number of items in GPU memory and save the handle. /// </summary> /// <param name="lCount">Specifies the number of items.</param> /// <param name="bUseHalfSize">Optionally, specifis to use half sized memory (default = false). This only applies to the 'float' base type.</param> public void Allocate(long lCount, bool bUseHalfSize = false) { if (bUseHalfSize && typeof(T) != typeof(float)) { bUseHalfSize = false; if (m_log != null) { m_log.WriteLine("Half size disabled for non 'float' basetypes!"); } } free(); m_nDeviceID = m_cuda.GetDeviceID(); m_bHalfSize = bUseHalfSize; m_hGpuData = m_cuda.AllocMemory(lCount, m_bHalfSize); m_lCapacity = lCount; m_lCount = 0; m_bOwnData = true; return; }
/// <summary> /// The SyncedMemory constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="lCapacity">Optionally, specifies the capacity of the SyncedMemory (in items).</param> public SyncedMemory(CudaDnn <T> cuda, Log log, long lCapacity = 0) { m_cuda = cuda; m_log = log; if (lCapacity > 0) { m_nDeviceID = m_cuda.GetDeviceID(); m_hGpuData = m_cuda.AllocMemory(lCapacity); m_lCapacity = lCapacity; m_lCount = lCapacity; } }
private void setupNetShare(Net <T> net, CudaDnn <T> cuda) { if (net == null) { m_cuda = cuda; return; } int nNetDeviceId = net.Cuda.GetDeviceID(); int nCudaDeviceId = cuda.GetDeviceID(); if (nNetDeviceId != nCudaDeviceId) { m_cuda = cuda; return; } m_netShare = net; m_cuda = m_netShare.Cuda; m_bUsingSharedNet = true; return; }