예제 #1
0
        /// <summary>
        /// Calculate the loss when training.
        /// </summary>
        /// <param name="sender">Specifies the sender</param>
        /// <param name="e">specifies the arguments.</param>
        private void LossLayer_OnGetLossTraining(object sender, MemoryLossLayerGetLossArgs <float> e)
        {
            Phase phase = (e.Tag == null) ? Phase.TRAIN : (Phase)e.Tag;

            Blob <float>    btm        = e.Bottom[0];
            Blob <float>    blobTarget = e.Bottom[1];
            CudaDnn <float> cuda       = m_mycaffe.Cuda;
            Net <float>     net        = m_mycaffe.GetInternalNet(Phase.TRAIN);

            int nIxTarget = (int)blobTarget.GetData(0);

            m_blobProbs.ReshapeLike(btm);
            m_blobScale.ReshapeLike(btm);
            softmax_fwd(btm, null, m_blobScale, m_blobProbs, 2);

            int nCount = btm.count(2);

            cuda.copy(nCount, m_blobProbs.gpu_data, btm.mutable_gpu_diff);

            long lPos;

            cuda.max(nCount, btm.gpu_data, out lPos);

            float fData = btm.GetDiff(nIxTarget);

            e.Loss += (-(float)Math.Log(fData));

            if (phase == Phase.TRAIN)
            {
                fData -= 1;
                btm.SetDiff(fData, nIxTarget);

                if ((int)lPos == nIxTarget)
                {
                    m_nCorrectCount++;
                }
            }


            e.EnableLossUpdate = false;
        }
예제 #2
0
        private void memLoss_OnGetLoss(object sender, MemoryLossLayerGetLossArgs <T> e)
        {
            if (m_bSkipLoss)
            {
                return;
            }

            int  nCount       = m_blobPolicyGradient.count();
            long hPolicyGrad  = m_blobPolicyGradient.mutable_gpu_data;
            long hBottomDiff  = e.Bottom[0].mutable_gpu_diff;
            long hDiscountedR = m_blobDiscountedR.gpu_data;

            // Calculate the actual loss.
            double dfSumSq = Utility.ConvertVal <T>(m_blobPolicyGradient.sumsq_data());
            double dfMean  = dfSumSq;

            e.Loss             = dfMean;
            e.EnableLossUpdate = false; // apply gradients to bottom directly.

            // Modulate the gradient with the advantage (PG magic happens right here.)
            m_mycaffe.Cuda.mul(nCount, hPolicyGrad, hDiscountedR, hPolicyGrad);
            m_mycaffe.Cuda.copy(nCount, hPolicyGrad, hBottomDiff);
            m_mycaffe.Cuda.mul_scalar(nCount, -1.0, hBottomDiff);
        }
예제 #3
0
        /// <summary>
        /// Calculate the gradients between the target m_loss and actual p_loss.
        /// </summary>
        /// <param name="sender">Specifies the sender.</param>
        /// <param name="e">Specifies the arguments.</param>
        private void m_memLoss_ComputeTdLoss(object sender, MemoryLossLayerGetLossArgs <T> e)
        {
            MemoryCollection rgMem = m_rgSamples;

            Blob <T> q_values      = m_netOutput.blob_by_name("logits");
            Blob <T> next_q_values = m_netTarget.blob_by_name("logits");

            float[] rgActions = rgMem.GetActionsAsOneHotVector(m_nActionCount);
            m_blobActions.ReshapeLike(q_values);
            m_blobActions.mutable_cpu_data = Utility.ConvertVec <T>(rgActions);
            m_blobQValue.ReshapeLike(q_values);

            // q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1)
            m_mycaffe.Cuda.mul(m_blobActions.count(), m_blobActions.gpu_data, q_values.gpu_data, m_blobQValue.mutable_gpu_data);
            reduce_sum_axis1(m_blobQValue);

            // next_q_value = next_q_values.max(1)[0]
            m_blobNextQValue.CopyFrom(next_q_values, false, true);
            reduce_argmax_axis1(m_blobNextQValue);

            // expected_q_values
            float[] rgRewards = rgMem.GetRewards();
            m_blobExpectedQValue.ReshapeLike(m_blobQValue);
            m_blobExpectedQValue.mutable_cpu_data = Utility.ConvertVec <T>(rgRewards);

            float[] rgDone = rgMem.GetInvertedDoneAsOneHotVector();
            m_blobDone.ReshapeLike(m_blobQValue);
            m_blobDone.mutable_cpu_data = Utility.ConvertVec <T>(rgDone);

            m_mycaffe.Cuda.mul(m_blobNextQValue.count(), m_blobNextQValue.gpu_data, m_blobDone.gpu_data, m_blobExpectedQValue.mutable_gpu_diff);           // next_q_val * (1- done)
            m_mycaffe.Cuda.mul_scalar(m_blobExpectedQValue.count(), m_fGamma, m_blobExpectedQValue.mutable_gpu_diff);                                      // gamma *  ^
            m_mycaffe.Cuda.add(m_blobExpectedQValue.count(), m_blobExpectedQValue.gpu_diff, m_blobExpectedQValue.gpu_data, m_blobExpectedQValue.gpu_data); // reward + ^

            // loss = (q_value - expected_q_value.detach()).pow(2)
            m_blobLoss.ReshapeLike(m_blobQValue);
            m_mycaffe.Cuda.sub(m_blobQValue.count(), m_blobQValue.gpu_data, m_blobExpectedQValue.gpu_data, m_blobQValue.mutable_gpu_diff); // q_value - expected_q_value
            m_mycaffe.Cuda.powx(m_blobLoss.count(), m_blobQValue.gpu_diff, 2.0, m_blobLoss.mutable_gpu_data);                              // (q_value - expected_q_value)^2

            // loss = (q_value - expected_q_value.detach()).pow(2) * weights
            m_blobWeights.ReshapeLike(m_blobQValue);
            m_blobWeights.mutable_cpu_data = Utility.ConvertVec <T>(m_rgSamples.Priorities);                                  // weights
            m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_data, m_blobWeights.gpu_data, m_blobLoss.mutable_gpu_data); //    ^ * weights

            // prios = loss + 1e-5
            m_mycaffe.Cuda.copy(m_blobLoss.count(), m_blobLoss.gpu_data, m_blobLoss.mutable_gpu_diff);
            m_mycaffe.Cuda.add_scalar(m_blobLoss.count(), 1e-5, m_blobLoss.mutable_gpu_diff);
            double[] rgPrios = Utility.ConvertVec <T>(m_blobLoss.mutable_cpu_diff);

            for (int i = 0; i < rgPrios.Length; i++)
            {
                m_rgSamples.Priorities[i] = rgPrios[i];
            }


            //-------------------------------------------------------
            //  Calculate the gradient - unroll the operations
            //  (autograd - psha! how about manualgrad :-D)
            //-------------------------------------------------------

            // initial gradient
            double dfGradient = 1.0;

            if (m_memLoss.layer_param.loss_weight.Count > 0)
            {
                dfGradient *= m_memLoss.layer_param.loss_weight[0];
            }

            // mean gradient - expand and divide by batch count
            dfGradient /= m_blobLoss.count();
            m_blobLoss.SetDiff(dfGradient);

            // multiplication gradient - multiply by the other side.
            m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_diff, m_blobWeights.gpu_data, m_blobLoss.mutable_gpu_diff);

            // power gradient - multiply by the exponent.
            m_mycaffe.Cuda.mul_scalar(m_blobLoss.count(), 2.0, m_blobLoss.mutable_gpu_diff);

            // q_value - expected_q_value gradient
            m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_diff, m_blobQValue.gpu_diff, m_blobLoss.mutable_gpu_diff);

            // squeeze/gather gradient
            mul(m_blobLoss, m_blobActions, e.Bottom[0]);

            e.Loss             = reduce_mean(m_blobLoss, false);
            e.EnableLossUpdate = false;
        }
예제 #4
0
 /// <summary>
 /// Calculate the loss when testing.
 /// </summary>
 /// <param name="sender">Specifies the sender</param>
 /// <param name="e">specifies the arguments.</param>
 private void LossLayer_OnGetLossTesting(object sender, MemoryLossLayerGetLossArgs <float> e)
 {
     e.Tag = Phase.TEST;
     LossLayer_OnGetLossTraining(sender, e);
     e.Tag = null;
 }
예제 #5
0
        /// <summary>
        /// Calcualte the loss and initial gradients.
        /// </summary>
        /// <param name="sender">Specifies the MemoryLoss layer firing the event.</param>
        /// <param name="e">Specifies the arguments with the Bottom(s) flowing into the MemoryLoss layer and the loss value to be filled out.</param>
        /// <remarks>
        /// The initial gradient is calculated such that it encourages the action that was taken to be taken.
        ///
        /// When using a Sigmoid, the gradient = (action=0) ? 1 - Aprob : 0 - Aprob.
        /// When using a Softmax, the gradient = the SoftmaxCrossEntropyLoss backward.
        ///
        /// @see [CS231n Convolution Neural Networks for Visual Recognition](http://cs231n.github.io/neural-networks-2/#losses) by Karpathy, Stanford University
        ///
        /// Regardless of the gradient used, the gradient is then modulated by multiplying it with the discounted rewards.
        /// </remarks>
        private void memLoss_OnGetLoss(object sender, MemoryLossLayerGetLossArgs <T> e)
        {
            if (m_bSkipLoss)
            {
                return;
            }

            int      nCount        = m_blobPolicyGradient.count();
            long     hActionOneHot = m_blobActionOneHot.gpu_data;
            long     hPolicyGrad   = m_blobPolicyGradient.mutable_gpu_data;
            long     hDiscountedR  = m_blobDiscountedR.gpu_data;
            double   dfLoss;
            Blob <T> blobOriginalBottom = e.Bottom[0];
            int      nDataSize          = e.Bottom[0].count(1);
            bool     bUsingEndData      = false;

            // When using a recurrent model and receiving data with more than one sequence,
            // copy and only use the last sequence data.
            if (m_nRecurrentSequenceLength > 1)
            {
                if (e.Bottom[0].num > 1)
                {
                    m_blobAprobLogit.CopyFrom(e.Bottom[0], false, true);
                    m_blobAprobLogit.CopyFrom(e.Bottom[0], true);

                    List <int> rgShape = e.Bottom[0].shape();
                    rgShape[0] = 1;
                    e.Bottom[0].Reshape(rgShape);
                    e.Bottom[0].CopyFrom(m_blobAprobLogit, (m_blobAprobLogit.num - 1) * nDataSize, 0, nDataSize, true, true);
                    bUsingEndData = true;
                }
            }

            long hBottomDiff = e.Bottom[0].mutable_gpu_diff;

            // Calculate the initial gradients (policy grad initially just contains the action probabilities)
            if (m_softmax != null)
            {
                BlobCollection <T> colBottom = new BlobCollection <T>();
                BlobCollection <T> colTop    = new BlobCollection <T>();

                colBottom.Add(e.Bottom[0]);             // aprob logit
                colBottom.Add(m_blobActionOneHot);      // action one-hot vectors
                colTop.Add(m_blobLoss);
                colTop.Add(m_blobPolicyGradient);

                if (!m_bSoftmaxCeSetup)
                {
                    m_softmaxCe.Setup(colBottom, colTop);
                    m_bSoftmaxCeSetup = true;
                }

                dfLoss = m_softmaxCe.Forward(colBottom, colTop);
                m_softmaxCe.Backward(colTop, new List <bool>()
                {
                    true, false
                }, colBottom);
                hPolicyGrad = colBottom[0].gpu_diff;
            }
            else
            {
                // Calculate (a=0) ? 1-aprob : 0-aprob
                m_mycaffe.Cuda.add_scalar(nCount, -1.0, hActionOneHot);              // invert one hot
                m_mycaffe.Cuda.abs(nCount, hActionOneHot, hActionOneHot);
                m_mycaffe.Cuda.mul_scalar(nCount, -1.0, hPolicyGrad);                // negate Aprob
                m_mycaffe.Cuda.add(nCount, hActionOneHot, hPolicyGrad, hPolicyGrad); // gradient = ((a=0)?1:0) - Aprob
                dfLoss = Utility.ConvertVal <T>(m_blobPolicyGradient.sumsq_data());

                m_mycaffe.Cuda.mul_scalar(nCount, -1.0, hPolicyGrad); // invert for we ApplyUpdate subtracts the gradients
            }

            // Modulate the gradient with the advantage (PG magic happens right here.)
            m_mycaffe.Cuda.mul(nCount, hPolicyGrad, hDiscountedR, hPolicyGrad);

            e.Loss             = dfLoss;
            e.EnableLossUpdate = false; // apply gradients to bottom directly.

            if (hPolicyGrad != hBottomDiff)
            {
                m_mycaffe.Cuda.copy(nCount, hPolicyGrad, hBottomDiff);
            }

            // When using recurrent model with more than one sequence of data, only
            // copy the diff to the last in the sequence and zero out the rest in the sequence.
            if (m_nRecurrentSequenceLength > 1 && bUsingEndData)
            {
                m_blobAprobLogit.SetDiff(0);
                m_blobAprobLogit.CopyFrom(e.Bottom[0], 0, (m_blobAprobLogit.num - 1) * nDataSize, nDataSize, false, true);
                e.Bottom[0].CopyFrom(m_blobAprobLogit, false, true);
                e.Bottom[0].CopyFrom(m_blobAprobLogit, true);
            }
        }
예제 #6
0
        /// <summary>
        /// Calcualte the loss and initial gradients.
        /// </summary>
        /// <param name="sender">Specifies the MemoryLoss layer firing the event.</param>
        /// <param name="e">Specifies the arguments with the Bottom(s) flowing into the MemoryLoss layer and the loss value to be filled out.</param>
        /// <remarks>
        /// The initial gradient is calculated such that it encourages the action that was taken to be taken.
        ///
        /// When using a Sigmoid, the gradient = (action=0) ? 1 - Aprob : 0 - Aprob.
        /// When using a Softmax, the gradient = the SoftmaxCrossEntropyLoss backward.
        ///
        /// @see [CS231n Convolution Neural Networks for Visual Recognition](http://cs231n.github.io/neural-networks-2/#losses) by Karpathy, Stanford University
        ///
        /// Regardless of the gradient used, the gradient is then modulated by multiplying it with the discounted rewards.
        /// </remarks>
        private void memLoss_OnGetLoss(object sender, MemoryLossLayerGetLossArgs <T> e)
        {
            if (m_bSkipLoss)
            {
                return;
            }

            int    nCount        = m_blobPolicyGradient.count();
            long   hActionOneHot = m_blobActionOneHot.gpu_data;
            long   hPolicyGrad   = m_blobPolicyGradient.mutable_gpu_data;
            long   hBottomDiff   = e.Bottom[0].mutable_gpu_diff;
            long   hDiscountedR  = m_blobDiscountedR.gpu_data;
            double dfLoss;

            // Calculate the initial gradients (policy grad initially just contains the action probabilities)
            if (m_softmax != null)
            {
                BlobCollection <T> colBottom = new BlobCollection <T>();
                BlobCollection <T> colTop    = new BlobCollection <T>();

                colBottom.Add(e.Bottom[0]);             // aprob logit
                colBottom.Add(m_blobActionOneHot);      // action one-hot vectors
                colTop.Add(m_blobLoss);
                colTop.Add(m_blobPolicyGradient);

                if (!m_bSoftmaxCeSetup)
                {
                    m_softmaxCe.Setup(colBottom, colTop);
                    m_bSoftmaxCeSetup = true;
                }

                dfLoss = m_softmaxCe.Forward(colBottom, colTop);
                m_softmaxCe.Backward(colTop, new List <bool>()
                {
                    true, false
                }, colBottom);
                hPolicyGrad = colBottom[0].gpu_diff;
            }
            else
            {
                // Calculate (a=0) ? 1-aprob : 0-aprob
                m_mycaffe.Cuda.add_scalar(nCount, -1.0, hActionOneHot);              // invert one hot
                m_mycaffe.Cuda.abs(nCount, hActionOneHot, hActionOneHot);
                m_mycaffe.Cuda.mul_scalar(nCount, -1.0, hPolicyGrad);                // negate Aprob
                m_mycaffe.Cuda.add(nCount, hActionOneHot, hPolicyGrad, hPolicyGrad); // gradient = ((a=0)?1:0) - Aprob
                dfLoss = Utility.ConvertVal <T>(m_blobPolicyGradient.sumsq_data());

                m_mycaffe.Cuda.mul_scalar(nCount, -1.0, hPolicyGrad); // invert for we ApplyUpdate subtracts the gradients
            }

            // Modulate the gradient with the advantage (PG magic happens right here.)
            m_mycaffe.Cuda.mul(nCount, hPolicyGrad, hDiscountedR, hPolicyGrad);

            e.Loss             = dfLoss;
            e.EnableLossUpdate = false; // apply gradients to bottom directly.

            if (hPolicyGrad != hBottomDiff)
            {
                m_mycaffe.Cuda.copy(nCount, hPolicyGrad, hBottomDiff);
            }
        }