/// <summary>
        /// Forward computation.
        /// </summary>
        /// <param name="colBottom">bottom input blob (length 2)
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the scores @f$ x \in [-\infty, +\infty] @f$,
        ///     which this layer maps to probability predictions @f$
        ///     \hat{p}_n = \sigma(x_n) \in [0,1]
        ///     @f$
        ///     using the sigmoid function @f$ \sigma(.) @f$ (see SigmoidLayer).
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the targets @f$ y \in [0,1] @f$.
        /// </param>
        /// <param name="colTop">top output blob vector (length 1)
        ///  -# @f$ (1 \times 1 \times 1 \times 1) @f$
        ///     the computed cross-entropy loss: @f$
        ///       E = \frac{-1}{n} \sum\limits_{n=1}^N \left[
        ///               p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n)
        ///           \right]
        ///     @f$
        /// </param>
        protected override void forward(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            // The forward pass computes the sigmoid outputs.
            m_colSigmoidBottomVec[0] = colBottom[0];
            m_sigmoidLayer.Forward(m_colSigmoidBottomVec, m_colSigmoidTopVec);

            // Compute the loss (negative log likelihood)
            int nCount = colBottom[0].count();

            // Stable version of loss computation for input data.
            long hInputData = colBottom[0].gpu_data;
            long hTarget    = colBottom[1].gpu_data;

            // Since this memory is not used for anything until it is overwritten
            // on the backward pass, we use it here to avoid having to allocate new GPU
            // memory to accumulate intermediate results in the kernel.
            long hLossData  = colBottom[0].mutable_gpu_diff;
            long hCountData = colBottom[1].mutable_gpu_diff;

            m_cuda.sigmoid_cross_entropy_fwd(nCount, hInputData, hTarget, hLossData, m_nIgnoreLabel.HasValue, m_nIgnoreLabel.GetValueOrDefault(-1), hCountData);

            double dfValidCount = nCount;

            // Only launch another CUDA kernel if we actually need the valid count.
            if (m_normalization == LossParameter.NormalizationMode.VALID && m_nIgnoreLabel.HasValue)
            {
                dfValidCount = m_cuda.asum_double(nCount, hCountData);
            }

            double dfLoss = m_cuda.asum_double(nCount, hLossData);

            m_dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);

            colTop[0].SetData(dfLoss / m_dfNormalizer, 0);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Forward computation
        /// </summary>
        /// <param name="colBottom">inpub Blob vector (length 1)
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the inputs @f$ x @f$
        ///  </param>
        /// <param name="colTop">top output Blob vector (length 1)
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the computed outputs @f$
        ///         y = x \sigma (\beta x)
        ///     @f$.
        /// </param>
        protected override void forward(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            long   hBottomData       = colBottom[0].gpu_data;
            long   hSigmoidInputData = m_blobSigmoidInput.mutable_gpu_data;
            long   hTopData          = colTop[0].mutable_gpu_data;
            int    nCount            = colBottom[0].count();
            double dfBeta            = m_param.swish_param.beta;

            m_cuda.copy(nCount, hBottomData, hSigmoidInputData);
            m_cuda.scal(nCount, dfBeta, hSigmoidInputData);
            m_sigmoidLayer.Forward(m_colSigmoidBottom, m_colSigmoidTop);
            m_cuda.mul(nCount, hBottomData, m_blobSigmoidOutput.gpu_data, hTopData);
        }
        /// <summary>
        /// Forward computation.
        /// </summary>
        /// <param name="colBottom">bottom input blob (length 2)
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the scores @f$ x \in [-\infty, +\infty] @f$,
        ///     which this layer maps to probability predictions @f$
        ///     \hat{p}_n = \sigma(x_n) \in [0,1]
        ///     @f$
        ///     using the sigmoid function @f$ \sigma(.) @f$ (see SigmoidLayer).
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the targets @f$ y \in [0,1] @f$.
        /// </param>
        /// <param name="colTop">top output blob vector (length 1)
        ///  -# @f$ (1 \times 1 \times 1 \times 1) @f$
        ///     the computed cross-entropy loss: @f$
        ///       E = \frac{-1}{n} \sum\limits_{n=1}^N \left[
        ///               p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n)
        ///           \right]
        ///     @f$
        /// </param>
        protected override void forward(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            // The forward pass computes the sigmoid outputs.
            m_colSigmoidBottomVec[0] = colBottom[0];
            m_sigmoidLayer.Forward(m_colSigmoidBottomVec, m_colSigmoidTopVec);

            // Compute the loss (negative log likelihood)
            int nCount = colBottom[0].count();

            // Stable version of loss computation for input data.
            long hInputData = colBottom[0].gpu_data;
            long hTarget    = colBottom[1].gpu_data;

            // Since this memory is not used for anything, we use it here to avoid having
            // to allocate the GPU memory to accumulate intermediate results.
            long hLossData  = colBottom[0].mutable_gpu_diff;
            long hCountData = colBottom[1].mutable_gpu_diff;

            m_cuda.cross_entropy_fwd(nCount, hInputData, hTarget, hLossData, m_nIgnoreLabel.HasValue, m_nIgnoreLabel.GetValueOrDefault(-1), hCountData);

            double dfValidCount = nCount;

            // Only launch another CUDA kernel if we actually need the valid count.
            if (m_normalization == LossParameter.NormalizationMode.VALID && m_nIgnoreLabel.HasValue)
            {
                dfValidCount = m_cuda.asum_double(nCount, hCountData);
            }

            double dfLoss = m_cuda.asum_double(nCount, hLossData);

            m_dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);

            colTop[0].SetData(dfLoss / m_dfNormalizer, 0);

            // Return the losses in colTop[1] if it exists.
            if (colTop.Count == 2)
            {
                m_cuda.copy(nCount, hLossData, m_blobLoss.mutable_gpu_data);
                colTop[1].ShareData(m_blobLoss);
            }

            // Clear scratch memory to prevent interfering with the backward pass (see #6202)
            colBottom[0].SetDiff(0);
            colBottom[1].SetDiff(0);
        }
        /// <summary>
        /// Forward computation.
        /// </summary>
        /// <param name="colBottom">bottom input blob (length 2)
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the scores @f$ x \in [-\infty, +\infty] @f$,
        ///     which this layer maps to probability predictions @f$
        ///     \hat{p}_n = \sigma(x_n) \in [0,1]
        ///     @f$
        ///     using the sigmoid function @f$ \sigma(.) @f$ (see SigmoidLayer).
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the targets @f$ y \in [0,1] @f$.
        /// </param>
        /// <param name="colTop">top output blob vector (length 1)
        ///  -# @f$ (1 \times 1 \times 1 \times 1) @f$
        ///     the computed cross-entropy loss: @f$
        ///       E = \frac{-1}{n} \sum\limits_{n=1}^N \left[
        ///               p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n)
        ///           \right]
        ///     @f$
        /// </param>
        protected override void forward(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            // Set the target data.
            if (m_blobTarget != null)
            {
                m_log.CHECK_EQ(colBottom[0].num, colBottom[1].count(), "SIGMOID_CROSS_ENTROPY_LOSS layer inputs must have the same count, or the target must have 'num' items of indexes.");
                m_blobTarget.SetData(0);

                float[] rgfTarget = convertF(colBottom[1].mutable_cpu_data);
                for (int i = 0; i < colBottom[1].num; i++)
                {
                    int nTargetIdx = (int)rgfTarget[i];
                    m_blobTarget.SetData(1.0, m_nInnerNum * i + nTargetIdx);
                }
            }

            // The forward pass computes the sigmoid outputs.
            m_colSigmoidBottomVec[0] = colBottom[0];
            m_sigmoidLayer.Forward(m_colSigmoidBottomVec, m_colSigmoidTopVec);

            // Compute the loss (negative log likelihood)
            int nCount = colBottom[0].count();

            // Stable version of loss computation for input data.
            long hInputData = colBottom[0].gpu_data;
            long hTarget    = (m_blobTarget != null) ? m_blobTarget.gpu_data : colBottom[1].gpu_data;

            // Since this memory is not used for anything, we use it here to avoid having
            // to allocate the GPU memory to accumulate intermediate results.
            long hLossData  = colBottom[0].mutable_gpu_diff;
            long hCountData = (m_blobTarget != null) ? m_blobTarget.mutable_gpu_diff : colBottom[1].mutable_gpu_diff;

            m_cuda.cross_entropy_fwd(nCount, hInputData, hTarget, hLossData, m_nIgnoreLabel.HasValue, m_nIgnoreLabel.GetValueOrDefault(-1), hCountData);

            double dfValidCount = nCount;

            // Only launch another CUDA kernel if we actually need the valid count.
            if (m_normalization == LossParameter.NormalizationMode.VALID && m_nIgnoreLabel.HasValue)
            {
                dfValidCount = m_cuda.asum_double(nCount, hCountData);
            }

            double dfLoss = m_cuda.asum_double(nCount, hLossData);

            m_dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);

            colTop[0].SetData(dfLoss / m_dfNormalizer, 0);

            // Return the losses in colTop[1] if it exists.
            if (colTop.Count == 2)
            {
                m_cuda.copy(nCount, hLossData, m_blobLoss.mutable_gpu_data);
                colTop[1].ShareData(m_blobLoss);
            }

            // Clear scratch memory to prevent interfering with the backward pass (see #6202)
            colBottom[0].SetDiff(0);
            colBottom[1].SetDiff(0);

            if (m_blobTarget != null)
            {
                m_blobTarget.SetDiff(0);
            }
        }