private void ComputeCrossEntropy(TensorOld y, TensorOld yHat) { var forwardoutData = ForwardOutput.GetRawValues(); var backwardoutData = BackwardOutput.GetRawValues(); for (int i = 0; i < sampleNumber; i++) { //取出一个样本及其对应的Label y.GetByDim1(i, yBuff); yHat.GetByDim1(i, yHatBuff); //计算交叉熵 forwardoutData[i] = Functions.CrossEntropy(yBuff, yHatBuff); //计算损失函数关于输入的导数 Derivatives.CrossEntropy(yBuff, yHatBuff, derBuff); Array.Copy(derBuff, 0, backwardoutData, i * derBuff.Length, derBuff.Length); } }
//这个方法不会产生多余的临时对象,问题就是不再存储Derivative //private void ErrorBP(Tensor output, Tensor error, Tensor result, int sampleIndex) //{ // for (int i = 0; i < categoryNumber; i++) // { // var der = 0d; // for (int j = 0; j < categoryNumber; j++) // { // if (i == j) // der += output[sampleIndex, i] * (1 - output[sampleIndex, j]) * error[sampleIndex, j]; // else // der += -output[sampleIndex, i] * output[sampleIndex, j] * error[sampleIndex, j]; // } // result[sampleIndex, i] = der; // } //} private void ErrorBP(TensorOld error) { var derData = Derivative.GetRawValues(); var errorData = error.GetRawValues(); var outData = BackwardOutput.GetRawValues(); Parallel.For(0, sampleNumber, sampleIndex => { var errorStart = error.GetRawOffset(sampleIndex, 0); //这里的两层嵌套执行的并不是严格的矩阵运算,导数应该是:error*jacob, //因为jacob矩阵是对称的所以使用jacob每行和error相乘的内积,循环写起来方便 Parallel.For(0, categoryNumber, i => { var derStart = Derivative.GetRawOffset(sampleIndex, i, 0); var sum = 0d; for (int j = 0; j < categoryNumber; j++) { sum += derData[derStart + j] * errorData[errorStart + j]; } outData[errorStart + i] = sum; }); }); }