Example #1
0
        /*
         * private double DerivativeTanh(double v)
         * {
         *  double p = Tanh(v);
         *  return 1 - p * p;
         * }
         */

        private void CalculateGradientWork(Object obj)
        {
            ParamterForGradientV2 args = (ParamterForGradientV2)obj;
            int start = args.start_pos;
            int end   = args.end_pos;

            double[,] outputs     = args.sampleOutputs;
            double[,] inputs      = args.sampleInputs;
            double[,] outOUTPUT   = args.outOUTPUT;
            double[,] outHIDDEN   = args.outHIDDEN;
            double[,] gradientWOH = new double[m_OUTPUT, m_HIDDEN];
            double[,] gradientWHI = new double[m_HIDDEN, m_INPUT];
            double[] gradientThreshHIDDEN = new double[m_HIDDEN];
            double[] gradientThreshOUTPUT = new double[m_OUTPUT];

            Array.Clear(gradientWOH, 0, gradientWOH.Length);
            Array.Clear(gradientWHI, 0, gradientWHI.Length);
            Array.Clear(gradientThreshHIDDEN, 0, gradientThreshHIDDEN.Length);
            Array.Clear(gradientThreshOUTPUT, 0, gradientThreshOUTPUT.Length);

            args.gradientThreshHIDDEN = gradientThreshHIDDEN;
            args.gradientThreshOUTPUT = gradientThreshOUTPUT;
            args.gradientWHI          = gradientWHI;
            args.gradientWOH          = gradientWOH;

            for (int k = start; k < end; ++k)
            {
                double[] thetaOUTPUT = new double[m_OUTPUT];
                //计算输出层的梯度
                for (int i = 0; i < m_OUTPUT; ++i)
                {
                    double temp = (outputs[i, k] - outOUTPUT[i, k]);
                    thetaOUTPUT[i]           = temp * outOUTPUT[i, k] * (1 - outOUTPUT[i, k]); //顺便计算出输出层的theta
                    gradientThreshOUTPUT[i] -= thetaOUTPUT[i];                                 //累计上每个样本造成的误差函数的梯度值,即为多个样本共同训练的梯度值。
                    for (int j = 0; j < m_HIDDEN; ++j)
                    {
                        gradientWOH[i, j] -= thetaOUTPUT[i] * outHIDDEN[j, k]; //输出层与隐藏层的梯度
                    }
                }
                //计算隐藏层的梯度
                for (int i = 0; i < m_HIDDEN; ++i)
                {
                    double temp = 0.0;
                    for (int j = 0; j < m_OUTPUT; ++j)
                    {
                        temp += thetaOUTPUT[j] * m_WOH[j, i];
                    }
                    double theta = temp * outHIDDEN[i, k] * (1 - outHIDDEN[i, k]);
                    gradientThreshHIDDEN[i] -= theta;
                    for (int j = 0; j < m_INPUT; ++j)
                    {
                        gradientWHI[i, j] -= theta * inputs[j, k];
                    }
                }
            }
            lock (this)
            {
                IncThreadFinishedNum();
            }
        }
Example #2
0
        //输入输出均以列为方向,每个输入的第一行总是1
        public double CalculateMiniBatchGradient(double[,] inputs, double[,] outputs)
        {
            int sampleDim = inputs.GetLength(0); //samoleDim==m_INPUT
            int sampleNum = inputs.GetLength(1);

            Array.Clear(m_GradientThreshHIDDEN, 0, m_GradientThreshHIDDEN.Length);
            Array.Clear(m_GradientWOH, 0, m_GradientWOH.Length);
            Array.Clear(m_GradientThreshOUTPUT, 0, m_GradientThreshOUTPUT.Length);
            Array.Clear(m_GradientWHI, 0, m_GradientWHI.Length);


            //多线程
            double[,] outHIDDEN = new double[m_HIDDEN, sampleNum];
            MatrixOperation op = new MatrixOperation(m_WHI, inputs);

            op.SetResultMatrix(ref outHIDDEN);
            op.MultiplyMatrixMultiThread();
            for (int k = 0; k < sampleNum; ++k)
            {
                for (int i = 0; i < m_HIDDEN; ++i)
                {
                    outHIDDEN[i, k] += m_WThreshHIDDEN[i];
                    outHIDDEN[i, k]  = Sigmoid(outHIDDEN[i, k]);
                }
            }

            double[,] outOUTPUT = new double[m_OUTPUT, sampleNum];
            op = new MatrixOperation(m_WOH, outHIDDEN);
            op.SetResultMatrix(ref outOUTPUT);
            op.MultiplyMatrixMultiThread();
            double totalCost = 0.0;

            for (int k = 0; k < sampleNum; ++k)
            {
                for (int i = 0; i < m_OUTPUT; ++i)
                {
                    outOUTPUT[i, k] += m_WThreshOUTPUT[i];
                    outOUTPUT[i, k]  = Sigmoid(outOUTPUT[i, k]);
                    totalCost       += (outOUTPUT[i, k] - outputs[i, k]) * (outOUTPUT[i, k] - outputs[i, k]);
                }
            }

            /*
             * for (int k = 0; k < sampleNum; ++k)
             * {
             *  double[] thetaOUTPUT = new double[m_OUTPUT];
             *  //计算输出层的梯度
             *  for (int i = 0; i < m_OUTPUT; ++i)
             *  {
             *      double temp = (outputs[i, k] - outOUTPUT[i, k]);
             *      thetaOUTPUT[i] = temp * outOUTPUT[i, k] * (1 - outOUTPUT[i, k]);//顺便计算出输出层的theta
             *      m_GradientThreshOUTPUT[i] -= thetaOUTPUT[i];//累计上每个样本造成的误差函数的梯度值,即为多个样本共同训练的梯度值。
             *      for (int j = 0; j < m_HIDDEN; ++j)
             *      {
             *          m_GradientWOH[i, j] -= thetaOUTPUT[i] * outHIDDEN[j, k]; //输出层与隐藏层的梯度
             *      }
             *  }
             *  //计算隐藏层的梯度
             *  for (int i = 0; i < m_HIDDEN; ++i)
             *  {
             *      double temp = 0.0;
             *      for (int j = 0; j < m_OUTPUT; ++j)
             *      {
             *          temp += thetaOUTPUT[j] * m_WOH[j, i];
             *      }
             *      double theta = temp * outHIDDEN[i, k] * (1 - outHIDDEN[i, k]);
             *      m_GradientThreshHIDDEN[i] -= theta;
             *      for (int j = 0; j < m_INPUT; ++j)
             *      {
             *          m_GradientWHI[i, j] -= theta * inputs[j, k];
             *      }
             *  }
             * }
             */

            int totalThread = m_ThreadNum;

            if (m_ThreadNum > sampleNum)
            {
                totalThread = sampleNum;
            }
            int samplePerThread    = sampleNum / totalThread;
            int samplesNotAssigned = sampleNum % totalThread;
            int sampleStart        = 0;
            int sampleEnd          = 0;

            ResetThreadFinishedNum();
            ParamterForGradientV2 [] ps = new ParamterForGradientV2[totalThread];
            for (int i = 0; i < totalThread; ++i)
            {
                sampleEnd = sampleStart + samplePerThread;
                if (samplesNotAssigned > 0)
                {
                    ++sampleEnd;
                    --samplesNotAssigned;
                }
                ParamterForGradientV2 p = new ParamterForGradientV2(sampleStart, sampleEnd);
                sampleStart     = sampleEnd;
                p.outOUTPUT     = outOUTPUT;
                p.outHIDDEN     = outHIDDEN;
                p.sampleInputs  = inputs;
                p.sampleOutputs = outputs;
                ps[i]           = p;
                Thread thread = new Thread(new ParameterizedThreadStart(CalculateGradientWork));
                thread.Start(p);
            }
            WaitForAllThreadFinished(totalThread);
            for (int k = 0; k < totalThread; ++k)
            {
                for (int i = 0; i < m_OUTPUT; ++i)
                {
                    m_GradientThreshOUTPUT[i] += ps[k].gradientThreshOUTPUT[i];
                    for (int j = 0; j < m_HIDDEN; ++j)
                    {
                        m_GradientWOH[i, j] += ps[k].gradientWOH[i, j];
                    }
                }
                for (int i = 0; i < m_HIDDEN; ++i)
                {
                    m_GradientThreshHIDDEN[i] += ps[k].gradientThreshHIDDEN[i];
                    for (int j = 0; j < m_INPUT; ++j)
                    {
                        m_GradientWHI[i, j] += ps[k].gradientWHI[i, j];
                    }
                }
            }


            //单线程

            /*
             * double[] outOUTPUT = new double[m_OUTPUT];
             * double[] outHIDDEN = new double[m_HIDDEN];
             * double[] thetaOUTPUT = new double[m_OUTPUT];
             * double totalCost = 0.0;
             * for (int k = 0; k < sampleNum; ++k)
             * {
             *  //计算隐藏层的输出
             *  double sig = 0.0;
             *  for (int i = 0; i < m_HIDDEN; ++i)
             *  {
             *      sig = m_WThreshHIDDEN[i];
             *      for (int j = 0; j < m_INPUT; ++j)
             *      {
             *          sig += m_WHI[i, j] * inputs[j,k];
             *      }
             *      outHIDDEN[i] = 1.0 / (1.0 + Math.Exp(-sig));
             *  }
             *  //计算输出层的输出,并计算梯度
             *  for (int i = 0; i < m_OUTPUT; ++i)
             *  {
             *      sig = m_WThreshOUTPUT[i];
             *      for (int j = 0; j < m_HIDDEN; ++j)
             *      {
             *          sig += m_WOH[i, j] * outHIDDEN[j];
             *      }
             *      outOUTPUT[i] = 1.0 / (1.0 + Math.Exp(-sig));
             *      double temp = (outputs[i,k] - outOUTPUT[i]);
             *      thetaOUTPUT[i] = temp * outOUTPUT[i] * (1 - outOUTPUT[i]);//顺便计算出输出层的theta
             *      totalCost += temp * temp; //累计下每个样本每个维度的cost
             *      m_GradientThreshOUTPUT[i] -= thetaOUTPUT[i];//累计上每个样本造成的误差函数的梯度值,即为多个样本共同训练的梯度值。
             *      for (int j = 0; j < m_HIDDEN; ++j)
             *      {
             *          m_GradientWOH[i, j] -= thetaOUTPUT[i] * outHIDDEN[j]; //输出层与隐藏层的梯度
             *      }
             *  }
             *
             *  for (int i = 0; i < m_HIDDEN; ++i)
             *  {
             *      double temp = 0.0;
             *      for (int j = 0; j < m_OUTPUT; ++j)
             *      {
             *          temp += thetaOUTPUT[j] * m_WOH[j, i];
             *      }
             *      double theta = temp * outHIDDEN[i] * (1 - outHIDDEN[i]);
             *      m_GradientThreshHIDDEN[i] -= theta;
             *      for (int j = 0; j < m_INPUT; ++j)
             *      {
             *          m_GradientWHI[i, j] -= theta * inputs[j,k];
             *      }
             *  }
             * }
             * totalCost /= (2 * sampleNum);
             */
            //求梯度的期望
            ComputeGradientExpectation(sampleNum);
            return(totalCost);
        }