private void softmax_fwd(Blob <float> blobBottom, Blob <float> blobClip, Blob <float> blobScale, Blob <float> blobTop, int nAxis) { int nCount = blobBottom.count(); int nOuterNum = blobBottom.count(0, nAxis); int nInnerNum = blobBottom.count(nAxis + 1); int nChannels = blobTop.shape(nAxis); long hBottomData = blobBottom.gpu_data; long hTopData = blobTop.mutable_gpu_data; long hScaleData = blobScale.mutable_gpu_data; CudaDnn <float> cuda = m_mycaffe.Cuda; cuda.copy(nCount, hBottomData, hTopData); // Apply clip. if (blobClip != null) { cuda.channel_scale(nCount, blobTop.num, blobTop.channels, blobTop.count(2), blobTop.gpu_data, blobClip.gpu_data, blobTop.mutable_gpu_data); } // We need to subtract the max to avoid numerical issues, compute the exp // and then normalize. // compute max. cuda.channel_max(nOuterNum * nInnerNum, nOuterNum, nChannels, nInnerNum, hTopData, hScaleData); // subtract cuda.channel_sub(nCount, nOuterNum, nChannels, nInnerNum, hScaleData, hTopData); // exponentiate cuda.exp(nCount, hTopData, hTopData); // Apply clip to remove 1's. if (blobClip != null) { cuda.channel_scale(nCount, blobTop.num, blobTop.channels, blobTop.count(2), blobTop.gpu_data, blobClip.gpu_data, blobTop.mutable_gpu_data); } // Sum after exp cuda.channel_sum(nOuterNum * nInnerNum, nOuterNum, nChannels, nInnerNum, hTopData, hScaleData); // divide cuda.channel_div(nCount, nOuterNum, nChannels, nInnerNum, hScaleData, hTopData); // Denan for divide by zero. cuda.denan(nCount, blobTop.mutable_gpu_data, 0); }
/// <summary> /// 'softmax' operation runs the softmax on each item in the btm and places the results in the top. /// </summary> /// <param name="btm">Specifies the input data.</param> /// <param name="top">Specifies the output data.</param> /// <returns>The top blob is returned.</returns> public Blob <T> softmax(Blob <T> btm, Blob <T> top) { string strMarker = marker; top.ReshapeLike(btm); int nOuterNum = btm.count(0, m_nAxis); int nInnerNum = btm.count(m_nAxis + 1); int nChannels = top.shape(m_nAxis); int nCount = btm.count(); work.ReshapeLike(top); m_cuda.copy(nCount, btm.gpu_data, top.mutable_gpu_data); // We need to subtract the max to avoid numerical issues, compute the exp // and then normalize. // compute max. m_cuda.channel_max(nOuterNum * nInnerNum, nOuterNum, nChannels, nInnerNum, top.gpu_data, work.mutable_gpu_data); // subtract m_cuda.channel_sub(nCount, nOuterNum, nChannels, nInnerNum, work.gpu_data, top.mutable_gpu_data); // exponentiate m_cuda.exp(nCount, top.gpu_data, top.mutable_gpu_data); // Sum after exp m_cuda.channel_sum(nOuterNum * nInnerNum, nOuterNum, nChannels, nInnerNum, top.gpu_data, work.mutable_gpu_data); // divide m_cuda.channel_div(nCount, nOuterNum, nChannels, nInnerNum, work.gpu_data, top.mutable_gpu_data); if (m_bNeedsBackprop) { Action backward = () => { work.ReshapeLike(top); m_cuda.copy(nCount, top.gpu_diff, work.mutable_gpu_diff); // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff. m_cuda.channel_dot(nOuterNum * nInnerNum, nOuterNum, nChannels, nInnerNum, top.gpu_diff, top.gpu_data, work.mutable_gpu_data); m_cuda.channel_sub(nCount, nOuterNum, nChannels, nInnerNum, work.gpu_data, work.mutable_gpu_diff); // elementwise multiplication m_cuda.mul(nCount, work.gpu_diff, top.gpu_data, work.mutable_gpu_diff); apply(work, btm); if (m_bClipGradients) { clip_gradient(btm); } if (m_bCheckForNans) { check_nan(btm); } if (m_bAddDebug) { add_debug(strMarker + " - softmax", btm, top); } }; m_rgBackprop.Add(new Tuple <string, Action>(m_strMarker, backward)); } return(top); }