/// <summary>
        /// Copy data from the GPU into the host buffer making sure to grow the host buffer capacity if needed.
        /// </summary>
        /// <param name="nCount">Specifies the number of items to copy.</param>
        /// <param name="hGpu">Specifies the source GPU data to copy.</param>
        public void CopyFromGpu(int nCount, long hGpu)
        {
            if (nCount > m_lCapacity)
            {
                Free();
                m_hBuffer   = m_cuda.AllocHostBuffer(nCount);
                m_lCapacity = nCount;
            }

            m_cuda.CopyDeviceToHost(nCount, hGpu, m_hBuffer);
        }
Beispiel #2
0
        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="cuda">Cuda engine.</param>
        /// <param name="log">General log.</param>
        /// <param name="p">provides the generic parameter for the DecodeLayer.</param>
        public DecodeLayer(CudaDnn <T> cuda, Log log, LayerParameter p)
            : base(cuda, log, p)
        {
            m_type               = LayerParameter.LayerType.DECODE;
            m_blobDistSq         = new Blob <T>(cuda, log, false);
            m_blobDistSq.Name    = m_param.name + " distsq";
            m_blobSummerVec      = new Blob <T>(cuda, log, false);
            m_blobSummerVec.Name = m_param.name + " sum";
            m_blobData           = new Blob <T>(cuda, log);
            m_blobData.Name      = m_param.name + " data";

            m_hMin          = cuda.AllocHostBuffer(m_param.decode_param.k);
            m_hMax          = cuda.AllocHostBuffer(m_param.decode_param.k);
            m_blobWork      = new Blob <T>(cuda, log);
            m_blobWork.Name = "work";
        }
Beispiel #3
0
        /// <summary>
        /// Copy another SyncedMemory into this one.
        /// </summary>
        /// <param name="src">Specifies the SyncedMemory to copy.</param>
        /// <param name="hDstHostBuffer">Optionally, specifies a host buffer used to copy between kernels (default = 0, not used).</param>
        /// <returns>When used the dst host buffer handle is returned.</returns>
        public long Copy(SyncedMemory <T> src, long hDstHostBuffer = 0)
        {
            if (src == null)
            {
                m_lCount = 0;
                return(hDstHostBuffer);
            }

            if (m_lCapacity < src.m_lCount)
            {
                Allocate(src.m_lCount);
            }

            m_lCount = src.m_lCount;

            if (m_lCount > 0)
            {
                if (m_cuda.KernelHandle == src.m_cuda.KernelHandle)
                {
                    check_device();
                    m_cuda.copy((int)m_lCount, src.m_hGpuData, m_hGpuData);
                }
                else
                {
                    if (hDstHostBuffer == 0)
                    {
                        hDstHostBuffer = m_cuda.AllocHostBuffer(m_lCount);
                    }
                    else
                    {
                        long lCount = m_cuda.GetHostBufferCapacity(hDstHostBuffer);
                        if (lCount < m_lCount)
                        {
                            m_cuda.FreeHostBuffer(hDstHostBuffer);
                            hDstHostBuffer = m_cuda.AllocHostBuffer(m_lCount);
                        }
                    }

                    src.m_cuda.KernelCopy((int)m_lCount, src.m_hGpuData, 0, m_cuda.KernelHandle, m_hGpuData, 0, hDstHostBuffer, m_cuda.KernelHandle);
                }
            }

            return(hDstHostBuffer);
        }