/// <summary>
        /// Forward computation.
        /// </summary>
        /// <param name="colBottom">bottom input blob (length 2)
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the scores @f$ x \in [-\infty, +\infty] @f$,
        ///     which this layer maps to probability predictions @f$
        ///     \hat{p}_n = \sigma(x_n) \in [0,1]
        ///     @f$
        ///     using the softmax function @f$ \sigma(.) @f$ (see SoftmaxLayer).
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the targets @f$ y \in [0,1] @f$.
        /// </param>
        /// <param name="colTop">top output blob vector (length 1)
        ///  -# @f$ (1 \times 1 \times 1 \times 1) @f$
        ///     the computed cross-entropy loss: @f$
        ///       E = \frac{-1}{n} \sum\limits_{n=1}^N \left[
        ///               p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n)
        ///           \right]
        ///     @f$
        /// </param>
        protected override void forward(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            // Set the target data.
            if (m_blobTarget != null)
            {
                m_log.CHECK_EQ(colBottom[0].num, colBottom[1].count(), "SOFTMAX_CROSS_ENTROPY_LOSS layer inputs must have the same count, or the target must have 'num' items of indexes.");
                m_blobTarget.SetData(0);

                float[] rgfTarget = convertF(colBottom[1].mutable_cpu_data);
                for (int i = 0; i < colBottom[1].num; i++)
                {
                    int nTargetIdx = (int)rgfTarget[i];
                    m_blobTarget.SetData(1.0, m_nInnerNum * i + nTargetIdx);
                }
            }

            // The forward pass computes the softmax outputs.
            m_colSoftmaxBottomVec[0] = colBottom[0];
            m_softmaxLayer.Forward(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);

            // Compute the loss (negative log likelihood)
            int nCount = colBottom[0].count();

            // Stable version of loss computation for input data.
            long hInputData = colBottom[0].gpu_data;
            long hTarget    = (m_blobTarget != null) ? m_blobTarget.gpu_data : colBottom[1].gpu_data;

            // Since this memory is not used for anything, we use it here to avoid having
            // to allocate the GPU memory to accumulate intermediate results.
            long hLossData  = colBottom[0].mutable_gpu_diff;
            long hCountData = (m_blobTarget != null) ? m_blobTarget.mutable_gpu_diff : colBottom[1].mutable_gpu_diff;

            m_cuda.cross_entropy_fwd(nCount, hInputData, hTarget, hLossData, false, -1, hCountData);

            double dfValidCount = nCount;
            double dfLoss       = m_cuda.asum_double(nCount, hLossData);

            m_dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);

            colTop[0].SetData(dfLoss / m_dfNormalizer, 0);

            // Return the losses in colTop[1] if it exists.
            if (colTop.Count == 2)
            {
                m_cuda.copy(nCount, hLossData, m_blobLoss.mutable_gpu_data);
                colTop[1].ShareData(m_blobLoss);
            }

            // Clear scratch memory to prevent interfering with the backward pass (see #6202)
            colBottom[0].SetDiff(0);
            colBottom[1].SetDiff(0);

            if (m_blobTarget != null)
            {
                m_blobTarget.SetDiff(0);
            }
        }
Example #2
0
        /// <summary>
        /// Forward computation.
        /// </summary>
        /// <param name="colBottom">bottom input blob (length 2)
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the scores @f$ x \in [-\infty, +\infty] @f$,
        ///     which this layer maps to probability predictions @f$
        ///     \hat{p}_n = \sigma(x_n) \in [0,1]
        ///     @f$
        ///     using the softmax function @f$ \sigma(.) @f$ (see SoftmaxLayer).
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the targets @f$ y \in [0,1] @f$.
        /// </param>
        /// <param name="colTop">top output blob vector (length 1)
        ///  -# @f$ (1 \times 1 \times 1 \times 1) @f$
        ///     the computed cross-entropy loss: @f$
        ///       E = \frac{-1}{n} \sum\limits_{n=1}^N \left[
        ///               p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n)
        ///           \right]
        ///     @f$
        /// </param>
        protected override void forward(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            // The forward pass computes the softmax outputs.
            m_colSoftmaxBottomVec[0] = colBottom[0];
            m_softmaxLayer.Forward(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);

            // Compute the loss (negative log likelihood)
            int nCount = colBottom[0].count();

            // Stable version of loss computation for input data.
            long hInputData = colBottom[0].gpu_data;
            long hTarget    = colBottom[1].gpu_data;

            // Since this memory is not used for anything, we use it here to avoid having
            // to allocate the GPU memory to accumulate intermediate results.
            long hLossData  = colBottom[0].mutable_gpu_diff;
            long hCountData = colBottom[1].mutable_gpu_diff;

            m_cuda.cross_entropy_fwd(nCount, hInputData, hTarget, hLossData, false, -1, hCountData);

            double dfValidCount = nCount;
            double dfLoss       = m_cuda.asum_double(nCount, hLossData);

            m_dfNormalizer = get_normalizer(m_normalization, (int)dfValidCount);

            colTop[0].SetData(dfLoss / m_dfNormalizer, 0);

            // Return the losses in colTop[1] if it exists.
            if (colTop.Count == 2)
            {
                m_cuda.copy(nCount, hLossData, m_blobLoss.mutable_gpu_data);
                colTop[1].ShareData(m_blobLoss);
            }

            // Clear scratch memory to prevent interfering with the backward pass (see #6202)
            colBottom[0].SetDiff(0);
            colBottom[1].SetDiff(0);
        }
Example #3
0
        /// <summary>
        /// The forward computation.
        /// </summary>
        /// <param name="colBottom">bottom input blob vector (length 2-3)
        ///  -# @f$ (N \times C \times H \times W) @f$
        ///     the predictions @f$ \hat{p} @f$, a Blob with values in
        ///     [0,1] indicating the predicted probability of each of the
        ///     K = CHW classes.  Each prediction vector @f$ \hat{p}_n @f$
        ///     should sum to 1 as in a probability distribution:
        ///       @f$ \forall n \sum\limits_{k=1}^K \hat{p}_{nk} = 1 @f$
        ///  -# @f$ (N \times 1 \times 1 \times 1) @f$
        ///     the labels @f$ l @f$, an integer-valued Blob with values
        ///     @f$ l_n \in [0, 1, 2, ..., K-1] @f$
        ///     indicating the correct class label among the @f$ K @f$ classes
        ///  -# @f$ (1 \times 1 \times K \times K) @f$
        ///     (\b optional) the infogain matrix @f$ H @f$.  This must be provided as
        ///     the third bottom blob input if not provided as the inforgain_mat in the
        ///     InfogainLossParameter.  If @f$ H = I @f$, this layer is equivalent to the
        ///     MultinomialLogisticsLossLayer.
        /// </param>
        /// <param name="colTop">top output blob vector (length 1)
        ///  -# @f$ (1 \times 1 \times 1 \times 1) @f$
        ///     the computed infogain multinomial logistic loss: @f$ E =
        ///       \frac{-1}{N} \sum\limits_{n=1}^N H_{l_n} \log(\hat{p}_n) =
        ///       \frac{-1}{N} \sum\limits_{n=1}^N \sum\limits_{k=1}^{K} H_{l_n,k}
        ///       \log(\hat{p}_{n,k})
        ///     @f$
        ///     where @f$ H_{l_n} @f$ denotes row @f$ l_n @f$ of @f$ H @f$.
        /// </param>
        protected override void forward(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            // The forward pass computes the softmax prob values.
            m_softmaxLayer.Forward(m_colSoftmaxBottomVec, m_colSoftmaxTopVec);

            Blob <T> blobInfoGain = (colBottom.Count < 3) ? m_blobInfoGain : colBottom[2];
            int      nCount       = 0;
            int      nLabel       = -1;
            double   dfLoss       = 0;
            double   dfProb       = 0;
            double   dfProbLog    = 0;
            double   dfVal;

            if (typeof(T) == typeof(double))
            {
                double[] rgProbData    = (double[])Convert.ChangeType(m_blobProb.update_cpu_data(), typeof(double[]));
                double[] rgBottomLabel = (double[])Convert.ChangeType(colBottom[1].update_cpu_data(), typeof(double[]));
                double[] rgInfoGainMat = (double[])Convert.ChangeType(blobInfoGain.update_cpu_data(), typeof(double[]));

                for (int i = 0; i < m_nOuterNum; i++)
                {
                    for (int j = 0; j < m_nInnerNum; j++)
                    {
                        nLabel = (int)rgBottomLabel[i * m_nInnerNum + j];
                        if (m_nIgnoreLabel.HasValue && m_nIgnoreLabel.Value == nLabel)
                        {
                            continue;
                        }

                        m_log.CHECK_GE(nLabel, 0, "The label should be greater than or equal to 0.");
                        m_log.CHECK_LT(nLabel, m_nNumLabels, "The label should be less than the number of labels '" + m_nNumLabels.ToString() + "'");

                        for (int l = 0; l < m_nNumLabels; l++)
                        {
                            dfProb    = Math.Max(rgProbData[i * m_nInnerNum * m_nNumLabels + l * m_nInnerNum + j], kLOG_THRESHOLD);
                            dfProbLog = Math.Log(dfProb);
                            dfVal     = rgInfoGainMat[nLabel * m_nNumLabels + l] * dfProbLog;
                            dfLoss   -= dfVal;
                        }

                        nCount++;
                    }
                }
            }
            else
            {
                float[] rgProbData    = (float[])Convert.ChangeType(m_blobProb.update_cpu_data(), typeof(float[]));
                float[] rgBottomLabel = (float[])Convert.ChangeType(colBottom[1].update_cpu_data(), typeof(float[]));
                float[] rgInfoGainMat = (float[])Convert.ChangeType(blobInfoGain.update_cpu_data(), typeof(float[]));

                for (int i = 0; i < m_nOuterNum; i++)
                {
                    for (int j = 0; j < m_nInnerNum; j++)
                    {
                        nLabel = (int)rgBottomLabel[i * m_nInnerNum + j];
                        if (m_nIgnoreLabel.HasValue && m_nIgnoreLabel.Value == nLabel)
                        {
                            continue;
                        }

                        m_log.CHECK_GE(nLabel, 0, "The label should be greater than or equal to 0.");
                        m_log.CHECK_LT(nLabel, m_nNumLabels, "The label should be less than the number of labels '" + m_nNumLabels.ToString() + "'");

                        for (int l = 0; l < m_nNumLabels; l++)
                        {
                            dfProb    = Math.Max(rgProbData[i * m_nInnerNum * m_nNumLabels + l * m_nInnerNum + j], kLOG_THRESHOLD);
                            dfProbLog = Math.Log(dfProb);
                            dfVal     = rgInfoGainMat[nLabel * m_nNumLabels + l] * dfProbLog;
                            dfLoss   -= dfVal;
                        }

                        nCount++;
                    }
                }
            }

            double dfNormalizer     = get_normalizer(m_normalization, nCount);
            double dfNormalizedLoss = dfLoss / dfNormalizer;

            colTop[0].SetData(dfNormalizedLoss, 0);

            if (colTop.Count == 2)
            {
                colTop[1].ShareData(m_blobProb);
            }
        }