private void ComputeDerivative() { var derData = Derivative.GetRawValues(); var outData = ForwardOutput.GetRawValues(); Parallel.For(0, sampleNumber, sampleIndex => { var outStart = ForwardOutput.GetRawOffset(sampleIndex, 0); Parallel.For(0, categoryNumber, i => { var derStart = Derivative.GetRawOffset(sampleIndex, i, 0); for (int j = 0; j < categoryNumber; j++) { if (i == j) { derData[derStart + j] = outData[(int)(outStart + i)] * (1 - outData[(int)(outStart + j)]); } else { derData[derStart + j] = -outData[(int)(outStart + i)] * outData[(int)(outStart + j)]; } } }); }); }
//这个方法不会产生多余的临时对象,问题就是不再存储Derivative //private void ErrorBP(Tensor output, Tensor error, Tensor result, int sampleIndex) //{ // for (int i = 0; i < categoryNumber; i++) // { // var der = 0d; // for (int j = 0; j < categoryNumber; j++) // { // if (i == j) // der += output[sampleIndex, i] * (1 - output[sampleIndex, j]) * error[sampleIndex, j]; // else // der += -output[sampleIndex, i] * output[sampleIndex, j] * error[sampleIndex, j]; // } // result[sampleIndex, i] = der; // } //} private void ErrorBP(TensorOld error) { var derData = Derivative.GetRawValues(); var errorData = error.GetRawValues(); var outData = BackwardOutput.GetRawValues(); Parallel.For(0, sampleNumber, sampleIndex => { var errorStart = error.GetRawOffset(sampleIndex, 0); //这里的两层嵌套执行的并不是严格的矩阵运算,导数应该是:error*jacob, //因为jacob矩阵是对称的所以使用jacob每行和error相乘的内积,循环写起来方便 Parallel.For(0, categoryNumber, i => { var derStart = Derivative.GetRawOffset(sampleIndex, i, 0); var sum = 0d; for (int j = 0; j < categoryNumber; j++) { sum += derData[derStart + j] * errorData[errorStart + j]; } outData[errorStart + i] = sum; }); }); }