/* * private double DerivativeTanh(double v) * { * double p = Tanh(v); * return 1 - p * p; * } */ private void CalculateGradientWork(Object obj) { ParamterForGradientV2 args = (ParamterForGradientV2)obj; int start = args.start_pos; int end = args.end_pos; double[,] outputs = args.sampleOutputs; double[,] inputs = args.sampleInputs; double[,] outOUTPUT = args.outOUTPUT; double[,] outHIDDEN = args.outHIDDEN; double[,] gradientWOH = new double[m_OUTPUT, m_HIDDEN]; double[,] gradientWHI = new double[m_HIDDEN, m_INPUT]; double[] gradientThreshHIDDEN = new double[m_HIDDEN]; double[] gradientThreshOUTPUT = new double[m_OUTPUT]; Array.Clear(gradientWOH, 0, gradientWOH.Length); Array.Clear(gradientWHI, 0, gradientWHI.Length); Array.Clear(gradientThreshHIDDEN, 0, gradientThreshHIDDEN.Length); Array.Clear(gradientThreshOUTPUT, 0, gradientThreshOUTPUT.Length); args.gradientThreshHIDDEN = gradientThreshHIDDEN; args.gradientThreshOUTPUT = gradientThreshOUTPUT; args.gradientWHI = gradientWHI; args.gradientWOH = gradientWOH; for (int k = start; k < end; ++k) { double[] thetaOUTPUT = new double[m_OUTPUT]; //计算输出层的梯度 for (int i = 0; i < m_OUTPUT; ++i) { double temp = (outputs[i, k] - outOUTPUT[i, k]); thetaOUTPUT[i] = temp * outOUTPUT[i, k] * (1 - outOUTPUT[i, k]); //顺便计算出输出层的theta gradientThreshOUTPUT[i] -= thetaOUTPUT[i]; //累计上每个样本造成的误差函数的梯度值,即为多个样本共同训练的梯度值。 for (int j = 0; j < m_HIDDEN; ++j) { gradientWOH[i, j] -= thetaOUTPUT[i] * outHIDDEN[j, k]; //输出层与隐藏层的梯度 } } //计算隐藏层的梯度 for (int i = 0; i < m_HIDDEN; ++i) { double temp = 0.0; for (int j = 0; j < m_OUTPUT; ++j) { temp += thetaOUTPUT[j] * m_WOH[j, i]; } double theta = temp * outHIDDEN[i, k] * (1 - outHIDDEN[i, k]); gradientThreshHIDDEN[i] -= theta; for (int j = 0; j < m_INPUT; ++j) { gradientWHI[i, j] -= theta * inputs[j, k]; } } } lock (this) { IncThreadFinishedNum(); } }
//输入输出均以列为方向,每个输入的第一行总是1 public double CalculateMiniBatchGradient(double[,] inputs, double[,] outputs) { int sampleDim = inputs.GetLength(0); //samoleDim==m_INPUT int sampleNum = inputs.GetLength(1); Array.Clear(m_GradientThreshHIDDEN, 0, m_GradientThreshHIDDEN.Length); Array.Clear(m_GradientWOH, 0, m_GradientWOH.Length); Array.Clear(m_GradientThreshOUTPUT, 0, m_GradientThreshOUTPUT.Length); Array.Clear(m_GradientWHI, 0, m_GradientWHI.Length); //多线程 double[,] outHIDDEN = new double[m_HIDDEN, sampleNum]; MatrixOperation op = new MatrixOperation(m_WHI, inputs); op.SetResultMatrix(ref outHIDDEN); op.MultiplyMatrixMultiThread(); for (int k = 0; k < sampleNum; ++k) { for (int i = 0; i < m_HIDDEN; ++i) { outHIDDEN[i, k] += m_WThreshHIDDEN[i]; outHIDDEN[i, k] = Sigmoid(outHIDDEN[i, k]); } } double[,] outOUTPUT = new double[m_OUTPUT, sampleNum]; op = new MatrixOperation(m_WOH, outHIDDEN); op.SetResultMatrix(ref outOUTPUT); op.MultiplyMatrixMultiThread(); double totalCost = 0.0; for (int k = 0; k < sampleNum; ++k) { for (int i = 0; i < m_OUTPUT; ++i) { outOUTPUT[i, k] += m_WThreshOUTPUT[i]; outOUTPUT[i, k] = Sigmoid(outOUTPUT[i, k]); totalCost += (outOUTPUT[i, k] - outputs[i, k]) * (outOUTPUT[i, k] - outputs[i, k]); } } /* * for (int k = 0; k < sampleNum; ++k) * { * double[] thetaOUTPUT = new double[m_OUTPUT]; * //计算输出层的梯度 * for (int i = 0; i < m_OUTPUT; ++i) * { * double temp = (outputs[i, k] - outOUTPUT[i, k]); * thetaOUTPUT[i] = temp * outOUTPUT[i, k] * (1 - outOUTPUT[i, k]);//顺便计算出输出层的theta * m_GradientThreshOUTPUT[i] -= thetaOUTPUT[i];//累计上每个样本造成的误差函数的梯度值,即为多个样本共同训练的梯度值。 * for (int j = 0; j < m_HIDDEN; ++j) * { * m_GradientWOH[i, j] -= thetaOUTPUT[i] * outHIDDEN[j, k]; //输出层与隐藏层的梯度 * } * } * //计算隐藏层的梯度 * for (int i = 0; i < m_HIDDEN; ++i) * { * double temp = 0.0; * for (int j = 0; j < m_OUTPUT; ++j) * { * temp += thetaOUTPUT[j] * m_WOH[j, i]; * } * double theta = temp * outHIDDEN[i, k] * (1 - outHIDDEN[i, k]); * m_GradientThreshHIDDEN[i] -= theta; * for (int j = 0; j < m_INPUT; ++j) * { * m_GradientWHI[i, j] -= theta * inputs[j, k]; * } * } * } */ int totalThread = m_ThreadNum; if (m_ThreadNum > sampleNum) { totalThread = sampleNum; } int samplePerThread = sampleNum / totalThread; int samplesNotAssigned = sampleNum % totalThread; int sampleStart = 0; int sampleEnd = 0; ResetThreadFinishedNum(); ParamterForGradientV2 [] ps = new ParamterForGradientV2[totalThread]; for (int i = 0; i < totalThread; ++i) { sampleEnd = sampleStart + samplePerThread; if (samplesNotAssigned > 0) { ++sampleEnd; --samplesNotAssigned; } ParamterForGradientV2 p = new ParamterForGradientV2(sampleStart, sampleEnd); sampleStart = sampleEnd; p.outOUTPUT = outOUTPUT; p.outHIDDEN = outHIDDEN; p.sampleInputs = inputs; p.sampleOutputs = outputs; ps[i] = p; Thread thread = new Thread(new ParameterizedThreadStart(CalculateGradientWork)); thread.Start(p); } WaitForAllThreadFinished(totalThread); for (int k = 0; k < totalThread; ++k) { for (int i = 0; i < m_OUTPUT; ++i) { m_GradientThreshOUTPUT[i] += ps[k].gradientThreshOUTPUT[i]; for (int j = 0; j < m_HIDDEN; ++j) { m_GradientWOH[i, j] += ps[k].gradientWOH[i, j]; } } for (int i = 0; i < m_HIDDEN; ++i) { m_GradientThreshHIDDEN[i] += ps[k].gradientThreshHIDDEN[i]; for (int j = 0; j < m_INPUT; ++j) { m_GradientWHI[i, j] += ps[k].gradientWHI[i, j]; } } } //单线程 /* * double[] outOUTPUT = new double[m_OUTPUT]; * double[] outHIDDEN = new double[m_HIDDEN]; * double[] thetaOUTPUT = new double[m_OUTPUT]; * double totalCost = 0.0; * for (int k = 0; k < sampleNum; ++k) * { * //计算隐藏层的输出 * double sig = 0.0; * for (int i = 0; i < m_HIDDEN; ++i) * { * sig = m_WThreshHIDDEN[i]; * for (int j = 0; j < m_INPUT; ++j) * { * sig += m_WHI[i, j] * inputs[j,k]; * } * outHIDDEN[i] = 1.0 / (1.0 + Math.Exp(-sig)); * } * //计算输出层的输出,并计算梯度 * for (int i = 0; i < m_OUTPUT; ++i) * { * sig = m_WThreshOUTPUT[i]; * for (int j = 0; j < m_HIDDEN; ++j) * { * sig += m_WOH[i, j] * outHIDDEN[j]; * } * outOUTPUT[i] = 1.0 / (1.0 + Math.Exp(-sig)); * double temp = (outputs[i,k] - outOUTPUT[i]); * thetaOUTPUT[i] = temp * outOUTPUT[i] * (1 - outOUTPUT[i]);//顺便计算出输出层的theta * totalCost += temp * temp; //累计下每个样本每个维度的cost * m_GradientThreshOUTPUT[i] -= thetaOUTPUT[i];//累计上每个样本造成的误差函数的梯度值,即为多个样本共同训练的梯度值。 * for (int j = 0; j < m_HIDDEN; ++j) * { * m_GradientWOH[i, j] -= thetaOUTPUT[i] * outHIDDEN[j]; //输出层与隐藏层的梯度 * } * } * * for (int i = 0; i < m_HIDDEN; ++i) * { * double temp = 0.0; * for (int j = 0; j < m_OUTPUT; ++j) * { * temp += thetaOUTPUT[j] * m_WOH[j, i]; * } * double theta = temp * outHIDDEN[i] * (1 - outHIDDEN[i]); * m_GradientThreshHIDDEN[i] -= theta; * for (int j = 0; j < m_INPUT; ++j) * { * m_GradientWHI[i, j] -= theta * inputs[j,k]; * } * } * } * totalCost /= (2 * sampleNum); */ //求梯度的期望 ComputeGradientExpectation(sampleNum); return(totalCost); }