/// <summary> /// Calculate the loss when training. /// </summary> /// <param name="sender">Specifies the sender</param> /// <param name="e">specifies the arguments.</param> private void LossLayer_OnGetLossTraining(object sender, MemoryLossLayerGetLossArgs <float> e) { Phase phase = (e.Tag == null) ? Phase.TRAIN : (Phase)e.Tag; Blob <float> btm = e.Bottom[0]; Blob <float> blobTarget = e.Bottom[1]; CudaDnn <float> cuda = m_mycaffe.Cuda; Net <float> net = m_mycaffe.GetInternalNet(Phase.TRAIN); int nIxTarget = (int)blobTarget.GetData(0); m_blobProbs.ReshapeLike(btm); m_blobScale.ReshapeLike(btm); softmax_fwd(btm, null, m_blobScale, m_blobProbs, 2); int nCount = btm.count(2); cuda.copy(nCount, m_blobProbs.gpu_data, btm.mutable_gpu_diff); long lPos; cuda.max(nCount, btm.gpu_data, out lPos); float fData = btm.GetDiff(nIxTarget); e.Loss += (-(float)Math.Log(fData)); if (phase == Phase.TRAIN) { fData -= 1; btm.SetDiff(fData, nIxTarget); if ((int)lPos == nIxTarget) { m_nCorrectCount++; } } e.EnableLossUpdate = false; }
private void memLoss_OnGetLoss(object sender, MemoryLossLayerGetLossArgs <T> e) { if (m_bSkipLoss) { return; } int nCount = m_blobPolicyGradient.count(); long hPolicyGrad = m_blobPolicyGradient.mutable_gpu_data; long hBottomDiff = e.Bottom[0].mutable_gpu_diff; long hDiscountedR = m_blobDiscountedR.gpu_data; // Calculate the actual loss. double dfSumSq = Utility.ConvertVal <T>(m_blobPolicyGradient.sumsq_data()); double dfMean = dfSumSq; e.Loss = dfMean; e.EnableLossUpdate = false; // apply gradients to bottom directly. // Modulate the gradient with the advantage (PG magic happens right here.) m_mycaffe.Cuda.mul(nCount, hPolicyGrad, hDiscountedR, hPolicyGrad); m_mycaffe.Cuda.copy(nCount, hPolicyGrad, hBottomDiff); m_mycaffe.Cuda.mul_scalar(nCount, -1.0, hBottomDiff); }
/// <summary> /// Calculate the gradients between the target m_loss and actual p_loss. /// </summary> /// <param name="sender">Specifies the sender.</param> /// <param name="e">Specifies the arguments.</param> private void m_memLoss_ComputeTdLoss(object sender, MemoryLossLayerGetLossArgs <T> e) { MemoryCollection rgMem = m_rgSamples; Blob <T> q_values = m_netOutput.blob_by_name("logits"); Blob <T> next_q_values = m_netTarget.blob_by_name("logits"); float[] rgActions = rgMem.GetActionsAsOneHotVector(m_nActionCount); m_blobActions.ReshapeLike(q_values); m_blobActions.mutable_cpu_data = Utility.ConvertVec <T>(rgActions); m_blobQValue.ReshapeLike(q_values); // q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1) m_mycaffe.Cuda.mul(m_blobActions.count(), m_blobActions.gpu_data, q_values.gpu_data, m_blobQValue.mutable_gpu_data); reduce_sum_axis1(m_blobQValue); // next_q_value = next_q_values.max(1)[0] m_blobNextQValue.CopyFrom(next_q_values, false, true); reduce_argmax_axis1(m_blobNextQValue); // expected_q_values float[] rgRewards = rgMem.GetRewards(); m_blobExpectedQValue.ReshapeLike(m_blobQValue); m_blobExpectedQValue.mutable_cpu_data = Utility.ConvertVec <T>(rgRewards); float[] rgDone = rgMem.GetInvertedDoneAsOneHotVector(); m_blobDone.ReshapeLike(m_blobQValue); m_blobDone.mutable_cpu_data = Utility.ConvertVec <T>(rgDone); m_mycaffe.Cuda.mul(m_blobNextQValue.count(), m_blobNextQValue.gpu_data, m_blobDone.gpu_data, m_blobExpectedQValue.mutable_gpu_diff); // next_q_val * (1- done) m_mycaffe.Cuda.mul_scalar(m_blobExpectedQValue.count(), m_fGamma, m_blobExpectedQValue.mutable_gpu_diff); // gamma * ^ m_mycaffe.Cuda.add(m_blobExpectedQValue.count(), m_blobExpectedQValue.gpu_diff, m_blobExpectedQValue.gpu_data, m_blobExpectedQValue.gpu_data); // reward + ^ // loss = (q_value - expected_q_value.detach()).pow(2) m_blobLoss.ReshapeLike(m_blobQValue); m_mycaffe.Cuda.sub(m_blobQValue.count(), m_blobQValue.gpu_data, m_blobExpectedQValue.gpu_data, m_blobQValue.mutable_gpu_diff); // q_value - expected_q_value m_mycaffe.Cuda.powx(m_blobLoss.count(), m_blobQValue.gpu_diff, 2.0, m_blobLoss.mutable_gpu_data); // (q_value - expected_q_value)^2 // loss = (q_value - expected_q_value.detach()).pow(2) * weights m_blobWeights.ReshapeLike(m_blobQValue); m_blobWeights.mutable_cpu_data = Utility.ConvertVec <T>(m_rgSamples.Priorities); // weights m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_data, m_blobWeights.gpu_data, m_blobLoss.mutable_gpu_data); // ^ * weights // prios = loss + 1e-5 m_mycaffe.Cuda.copy(m_blobLoss.count(), m_blobLoss.gpu_data, m_blobLoss.mutable_gpu_diff); m_mycaffe.Cuda.add_scalar(m_blobLoss.count(), 1e-5, m_blobLoss.mutable_gpu_diff); double[] rgPrios = Utility.ConvertVec <T>(m_blobLoss.mutable_cpu_diff); for (int i = 0; i < rgPrios.Length; i++) { m_rgSamples.Priorities[i] = rgPrios[i]; } //------------------------------------------------------- // Calculate the gradient - unroll the operations // (autograd - psha! how about manualgrad :-D) //------------------------------------------------------- // initial gradient double dfGradient = 1.0; if (m_memLoss.layer_param.loss_weight.Count > 0) { dfGradient *= m_memLoss.layer_param.loss_weight[0]; } // mean gradient - expand and divide by batch count dfGradient /= m_blobLoss.count(); m_blobLoss.SetDiff(dfGradient); // multiplication gradient - multiply by the other side. m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_diff, m_blobWeights.gpu_data, m_blobLoss.mutable_gpu_diff); // power gradient - multiply by the exponent. m_mycaffe.Cuda.mul_scalar(m_blobLoss.count(), 2.0, m_blobLoss.mutable_gpu_diff); // q_value - expected_q_value gradient m_mycaffe.Cuda.mul(m_blobLoss.count(), m_blobLoss.gpu_diff, m_blobQValue.gpu_diff, m_blobLoss.mutable_gpu_diff); // squeeze/gather gradient mul(m_blobLoss, m_blobActions, e.Bottom[0]); e.Loss = reduce_mean(m_blobLoss, false); e.EnableLossUpdate = false; }
/// <summary> /// Calculate the loss when testing. /// </summary> /// <param name="sender">Specifies the sender</param> /// <param name="e">specifies the arguments.</param> private void LossLayer_OnGetLossTesting(object sender, MemoryLossLayerGetLossArgs <float> e) { e.Tag = Phase.TEST; LossLayer_OnGetLossTraining(sender, e); e.Tag = null; }
/// <summary> /// Calcualte the loss and initial gradients. /// </summary> /// <param name="sender">Specifies the MemoryLoss layer firing the event.</param> /// <param name="e">Specifies the arguments with the Bottom(s) flowing into the MemoryLoss layer and the loss value to be filled out.</param> /// <remarks> /// The initial gradient is calculated such that it encourages the action that was taken to be taken. /// /// When using a Sigmoid, the gradient = (action=0) ? 1 - Aprob : 0 - Aprob. /// When using a Softmax, the gradient = the SoftmaxCrossEntropyLoss backward. /// /// @see [CS231n Convolution Neural Networks for Visual Recognition](http://cs231n.github.io/neural-networks-2/#losses) by Karpathy, Stanford University /// /// Regardless of the gradient used, the gradient is then modulated by multiplying it with the discounted rewards. /// </remarks> private void memLoss_OnGetLoss(object sender, MemoryLossLayerGetLossArgs <T> e) { if (m_bSkipLoss) { return; } int nCount = m_blobPolicyGradient.count(); long hActionOneHot = m_blobActionOneHot.gpu_data; long hPolicyGrad = m_blobPolicyGradient.mutable_gpu_data; long hDiscountedR = m_blobDiscountedR.gpu_data; double dfLoss; Blob <T> blobOriginalBottom = e.Bottom[0]; int nDataSize = e.Bottom[0].count(1); bool bUsingEndData = false; // When using a recurrent model and receiving data with more than one sequence, // copy and only use the last sequence data. if (m_nRecurrentSequenceLength > 1) { if (e.Bottom[0].num > 1) { m_blobAprobLogit.CopyFrom(e.Bottom[0], false, true); m_blobAprobLogit.CopyFrom(e.Bottom[0], true); List <int> rgShape = e.Bottom[0].shape(); rgShape[0] = 1; e.Bottom[0].Reshape(rgShape); e.Bottom[0].CopyFrom(m_blobAprobLogit, (m_blobAprobLogit.num - 1) * nDataSize, 0, nDataSize, true, true); bUsingEndData = true; } } long hBottomDiff = e.Bottom[0].mutable_gpu_diff; // Calculate the initial gradients (policy grad initially just contains the action probabilities) if (m_softmax != null) { BlobCollection <T> colBottom = new BlobCollection <T>(); BlobCollection <T> colTop = new BlobCollection <T>(); colBottom.Add(e.Bottom[0]); // aprob logit colBottom.Add(m_blobActionOneHot); // action one-hot vectors colTop.Add(m_blobLoss); colTop.Add(m_blobPolicyGradient); if (!m_bSoftmaxCeSetup) { m_softmaxCe.Setup(colBottom, colTop); m_bSoftmaxCeSetup = true; } dfLoss = m_softmaxCe.Forward(colBottom, colTop); m_softmaxCe.Backward(colTop, new List <bool>() { true, false }, colBottom); hPolicyGrad = colBottom[0].gpu_diff; } else { // Calculate (a=0) ? 1-aprob : 0-aprob m_mycaffe.Cuda.add_scalar(nCount, -1.0, hActionOneHot); // invert one hot m_mycaffe.Cuda.abs(nCount, hActionOneHot, hActionOneHot); m_mycaffe.Cuda.mul_scalar(nCount, -1.0, hPolicyGrad); // negate Aprob m_mycaffe.Cuda.add(nCount, hActionOneHot, hPolicyGrad, hPolicyGrad); // gradient = ((a=0)?1:0) - Aprob dfLoss = Utility.ConvertVal <T>(m_blobPolicyGradient.sumsq_data()); m_mycaffe.Cuda.mul_scalar(nCount, -1.0, hPolicyGrad); // invert for we ApplyUpdate subtracts the gradients } // Modulate the gradient with the advantage (PG magic happens right here.) m_mycaffe.Cuda.mul(nCount, hPolicyGrad, hDiscountedR, hPolicyGrad); e.Loss = dfLoss; e.EnableLossUpdate = false; // apply gradients to bottom directly. if (hPolicyGrad != hBottomDiff) { m_mycaffe.Cuda.copy(nCount, hPolicyGrad, hBottomDiff); } // When using recurrent model with more than one sequence of data, only // copy the diff to the last in the sequence and zero out the rest in the sequence. if (m_nRecurrentSequenceLength > 1 && bUsingEndData) { m_blobAprobLogit.SetDiff(0); m_blobAprobLogit.CopyFrom(e.Bottom[0], 0, (m_blobAprobLogit.num - 1) * nDataSize, nDataSize, false, true); e.Bottom[0].CopyFrom(m_blobAprobLogit, false, true); e.Bottom[0].CopyFrom(m_blobAprobLogit, true); } }
/// <summary> /// Calcualte the loss and initial gradients. /// </summary> /// <param name="sender">Specifies the MemoryLoss layer firing the event.</param> /// <param name="e">Specifies the arguments with the Bottom(s) flowing into the MemoryLoss layer and the loss value to be filled out.</param> /// <remarks> /// The initial gradient is calculated such that it encourages the action that was taken to be taken. /// /// When using a Sigmoid, the gradient = (action=0) ? 1 - Aprob : 0 - Aprob. /// When using a Softmax, the gradient = the SoftmaxCrossEntropyLoss backward. /// /// @see [CS231n Convolution Neural Networks for Visual Recognition](http://cs231n.github.io/neural-networks-2/#losses) by Karpathy, Stanford University /// /// Regardless of the gradient used, the gradient is then modulated by multiplying it with the discounted rewards. /// </remarks> private void memLoss_OnGetLoss(object sender, MemoryLossLayerGetLossArgs <T> e) { if (m_bSkipLoss) { return; } int nCount = m_blobPolicyGradient.count(); long hActionOneHot = m_blobActionOneHot.gpu_data; long hPolicyGrad = m_blobPolicyGradient.mutable_gpu_data; long hBottomDiff = e.Bottom[0].mutable_gpu_diff; long hDiscountedR = m_blobDiscountedR.gpu_data; double dfLoss; // Calculate the initial gradients (policy grad initially just contains the action probabilities) if (m_softmax != null) { BlobCollection <T> colBottom = new BlobCollection <T>(); BlobCollection <T> colTop = new BlobCollection <T>(); colBottom.Add(e.Bottom[0]); // aprob logit colBottom.Add(m_blobActionOneHot); // action one-hot vectors colTop.Add(m_blobLoss); colTop.Add(m_blobPolicyGradient); if (!m_bSoftmaxCeSetup) { m_softmaxCe.Setup(colBottom, colTop); m_bSoftmaxCeSetup = true; } dfLoss = m_softmaxCe.Forward(colBottom, colTop); m_softmaxCe.Backward(colTop, new List <bool>() { true, false }, colBottom); hPolicyGrad = colBottom[0].gpu_diff; } else { // Calculate (a=0) ? 1-aprob : 0-aprob m_mycaffe.Cuda.add_scalar(nCount, -1.0, hActionOneHot); // invert one hot m_mycaffe.Cuda.abs(nCount, hActionOneHot, hActionOneHot); m_mycaffe.Cuda.mul_scalar(nCount, -1.0, hPolicyGrad); // negate Aprob m_mycaffe.Cuda.add(nCount, hActionOneHot, hPolicyGrad, hPolicyGrad); // gradient = ((a=0)?1:0) - Aprob dfLoss = Utility.ConvertVal <T>(m_blobPolicyGradient.sumsq_data()); m_mycaffe.Cuda.mul_scalar(nCount, -1.0, hPolicyGrad); // invert for we ApplyUpdate subtracts the gradients } // Modulate the gradient with the advantage (PG magic happens right here.) m_mycaffe.Cuda.mul(nCount, hPolicyGrad, hDiscountedR, hPolicyGrad); e.Loss = dfLoss; e.EnableLossUpdate = false; // apply gradients to bottom directly. if (hPolicyGrad != hBottomDiff) { m_mycaffe.Cuda.copy(nCount, hPolicyGrad, hBottomDiff); } }