/// <summary> /// Copy data from the GPU into the host buffer making sure to grow the host buffer capacity if needed. /// </summary> /// <param name="nCount">Specifies the number of items to copy.</param> /// <param name="hGpu">Specifies the source GPU data to copy.</param> public void CopyFromGpu(int nCount, long hGpu) { if (nCount > m_lCapacity) { Free(); m_hBuffer = m_cuda.AllocHostBuffer(nCount); m_lCapacity = nCount; } m_cuda.CopyDeviceToHost(nCount, hGpu, m_hBuffer); }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Cuda engine.</param> /// <param name="log">General log.</param> /// <param name="p">provides the generic parameter for the DecodeLayer.</param> public DecodeLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.DECODE; m_blobDistSq = new Blob <T>(cuda, log, false); m_blobDistSq.Name = m_param.name + " distsq"; m_blobSummerVec = new Blob <T>(cuda, log, false); m_blobSummerVec.Name = m_param.name + " sum"; m_blobData = new Blob <T>(cuda, log); m_blobData.Name = m_param.name + " data"; m_hMin = cuda.AllocHostBuffer(m_param.decode_param.k); m_hMax = cuda.AllocHostBuffer(m_param.decode_param.k); m_blobWork = new Blob <T>(cuda, log); m_blobWork.Name = "work"; }
/// <summary> /// Copy another SyncedMemory into this one. /// </summary> /// <param name="src">Specifies the SyncedMemory to copy.</param> /// <param name="hDstHostBuffer">Optionally, specifies a host buffer used to copy between kernels (default = 0, not used).</param> /// <returns>When used the dst host buffer handle is returned.</returns> public long Copy(SyncedMemory <T> src, long hDstHostBuffer = 0) { if (src == null) { m_lCount = 0; return(hDstHostBuffer); } if (m_lCapacity < src.m_lCount) { Allocate(src.m_lCount); } m_lCount = src.m_lCount; if (m_lCount > 0) { if (m_cuda.KernelHandle == src.m_cuda.KernelHandle) { check_device(); m_cuda.copy((int)m_lCount, src.m_hGpuData, m_hGpuData); } else { if (hDstHostBuffer == 0) { hDstHostBuffer = m_cuda.AllocHostBuffer(m_lCount); } else { long lCount = m_cuda.GetHostBufferCapacity(hDstHostBuffer); if (lCount < m_lCount) { m_cuda.FreeHostBuffer(hDstHostBuffer); hDstHostBuffer = m_cuda.AllocHostBuffer(m_lCount); } } src.m_cuda.KernelCopy((int)m_lCount, src.m_hGpuData, 0, m_cuda.KernelHandle, m_hGpuData, 0, hDstHostBuffer, m_cuda.KernelHandle); } } return(hDstHostBuffer); }