private void ComputeDerivative()
        {
            var derData = Derivative.GetRawValues();
            var outData = ForwardOutput.GetRawValues();

            Parallel.For(0, sampleNumber, sampleIndex =>
            {
                var outStart = ForwardOutput.GetRawOffset(sampleIndex, 0);
                Parallel.For(0, categoryNumber, i =>
                {
                    var derStart = Derivative.GetRawOffset(sampleIndex, i, 0);
                    for (int j = 0; j < categoryNumber; j++)
                    {
                        if (i == j)
                        {
                            derData[derStart + j] = outData[(int)(outStart + i)] * (1 - outData[(int)(outStart + j)]);
                        }
                        else
                        {
                            derData[derStart + j] = -outData[(int)(outStart + i)] * outData[(int)(outStart + j)];
                        }
                    }
                });
            });
        }
        //这个方法不会产生多余的临时对象,问题就是不再存储Derivative
        //private void ErrorBP(Tensor output, Tensor error, Tensor result, int sampleIndex)
        //{
        //    for (int i = 0; i < categoryNumber; i++)
        //    {
        //        var der = 0d;
        //        for (int j = 0; j < categoryNumber; j++)
        //        {
        //            if (i == j)
        //                der += output[sampleIndex, i] * (1 - output[sampleIndex, j]) * error[sampleIndex, j];
        //            else
        //                der += -output[sampleIndex, i] * output[sampleIndex, j] * error[sampleIndex, j];
        //        }
        //        result[sampleIndex, i] = der;
        //    }
        //}

        private void ErrorBP(TensorOld error)
        {
            var derData   = Derivative.GetRawValues();
            var errorData = error.GetRawValues();
            var outData   = BackwardOutput.GetRawValues();

            Parallel.For(0, sampleNumber, sampleIndex =>
            {
                var errorStart = error.GetRawOffset(sampleIndex, 0);
                //这里的两层嵌套执行的并不是严格的矩阵运算,导数应该是:error*jacob,
                //因为jacob矩阵是对称的所以使用jacob每行和error相乘的内积,循环写起来方便
                Parallel.For(0, categoryNumber, i =>
                {
                    var derStart = Derivative.GetRawOffset(sampleIndex, i, 0);
                    var sum      = 0d;
                    for (int j = 0; j < categoryNumber; j++)
                    {
                        sum += derData[derStart + j] * errorData[errorStart + j];
                    }
                    outData[errorStart + i] = sum;
                });
            });
        }