Example #1
0
        private void ComputeCrossEntropy(TensorOld y, TensorOld yHat)
        {
            var forwardoutData  = ForwardOutput.GetRawValues();
            var backwardoutData = BackwardOutput.GetRawValues();

            for (int i = 0; i < sampleNumber; i++)
            {
                //取出一个样本及其对应的Label
                y.GetByDim1(i, yBuff);
                yHat.GetByDim1(i, yHatBuff);
                //计算交叉熵
                forwardoutData[i] = Functions.CrossEntropy(yBuff, yHatBuff);

                //计算损失函数关于输入的导数
                Derivatives.CrossEntropy(yBuff, yHatBuff, derBuff);
                Array.Copy(derBuff, 0, backwardoutData, i * derBuff.Length, derBuff.Length);
            }
        }
        //这个方法不会产生多余的临时对象,问题就是不再存储Derivative
        //private void ErrorBP(Tensor output, Tensor error, Tensor result, int sampleIndex)
        //{
        //    for (int i = 0; i < categoryNumber; i++)
        //    {
        //        var der = 0d;
        //        for (int j = 0; j < categoryNumber; j++)
        //        {
        //            if (i == j)
        //                der += output[sampleIndex, i] * (1 - output[sampleIndex, j]) * error[sampleIndex, j];
        //            else
        //                der += -output[sampleIndex, i] * output[sampleIndex, j] * error[sampleIndex, j];
        //        }
        //        result[sampleIndex, i] = der;
        //    }
        //}

        private void ErrorBP(TensorOld error)
        {
            var derData   = Derivative.GetRawValues();
            var errorData = error.GetRawValues();
            var outData   = BackwardOutput.GetRawValues();

            Parallel.For(0, sampleNumber, sampleIndex =>
            {
                var errorStart = error.GetRawOffset(sampleIndex, 0);
                //这里的两层嵌套执行的并不是严格的矩阵运算,导数应该是:error*jacob,
                //因为jacob矩阵是对称的所以使用jacob每行和error相乘的内积,循环写起来方便
                Parallel.For(0, categoryNumber, i =>
                {
                    var derStart = Derivative.GetRawOffset(sampleIndex, i, 0);
                    var sum      = 0d;
                    for (int j = 0; j < categoryNumber; j++)
                    {
                        sum += derData[derStart + j] * errorData[errorStart + j];
                    }
                    outData[errorStart + i] = sum;
                });
            });
        }