Example #1
0
        public override void learnNet(State state, int timeat)
        {
            //create delta list
            double beta2 = beta * alpha;
            if (m_bCRFTraining == true)
            {
                //For RNN-CRF, use joint probability of output layer nodes and transition between contigous nodes
                for (int c = 0; c < L2; c++)
                {
                    neuOutput[c].er = -m_Diff[timeat][c];
                }
                neuOutput[state.GetLabel()].er = 1 - m_Diff[timeat][state.GetLabel()];
            }
            else
            {
                //For standard RNN
                for (int c = 0; c < L2; c++)
                {
                    neuOutput[c].er = -neuOutput[c].ac;
                }
                neuOutput[state.GetLabel()].er = 1 - neuOutput[state.GetLabel()].ac;
            }

            //Get sparse feature and apply it into hidden layer
            var sparse = state.GetSparseData();
            int sparseFeatureSize = sparse.GetNumberOfEntries();

            //put variables for derivaties in weight class and cell class
            Parallel.For(0, L1, parallelOption, i =>
            {
                LSTMWeight[] w_i = mat_input2hidden[i];
                LSTMCell c = neuHidden[i];
                for (int k = 0; k < sparseFeatureSize; k++)
                {
                    var entry = sparse.GetEntry(k);
                    LSTMWeight w = w_i[entry.Key];
                    w_i[entry.Key].dSInputCell = w.dSInputCell * c.yForget + gPrime(c.netCellState) * c.yIn * entry.Value;
                    w_i[entry.Key].dSInputInputGate = w.dSInputInputGate * c.yForget + activationFunctionG(c.netCellState) * fPrime(c.netIn) * entry.Value;
                    w_i[entry.Key].dSInputForgetGate = w.dSInputForgetGate * c.yForget + c.previousCellState * fPrime(c.netForget) * entry.Value;

                }

                if (fea_size > 0)
                {
                    w_i = mat_feature2hidden[i];
                    for (int j = 0; j < fea_size; j++)
                    {
                        LSTMWeight w = w_i[j];
                        w_i[j].dSInputCell = w.dSInputCell * c.yForget + gPrime(c.netCellState) * c.yIn * neuFeatures[j].ac;
                        w_i[j].dSInputInputGate = w.dSInputInputGate * c.yForget + activationFunctionG(c.netCellState) * fPrime(c.netIn) * neuFeatures[j].ac;
                        w_i[j].dSInputForgetGate = w.dSInputForgetGate * c.yForget + c.previousCellState * fPrime(c.netForget) * neuFeatures[j].ac;

                    }
                }

                //partial derivatives for internal connections
                c.dSWCellIn = c.dSWCellIn * c.yForget + activationFunctionG(c.netCellState) * fPrime(c.netIn) * c.cellState;

                //partial derivatives for internal connections, initially zero as dS is zero and previous cell state is zero
                c.dSWCellForget = c.dSWCellForget * c.yForget + c.previousCellState * fPrime(c.netForget) * c.previousCellState;

                neuHidden[i] = c;
            });

            //for all output neurons
            for (int k = 0; k < L2; k++)
            {
                //for each connection to the hidden layer
                double er = neuOutput[k].er;
                for (int j = 0; j <= L1; j++)
                {
                    deltaHiddenOutput[j][k] = alpha * neuHidden[j].cellOutput * er;
                }
            }

            //for each hidden neuron
            Parallel.For(0, L1, parallelOption, i =>
              {
              LSTMCell c = neuHidden[i];

              //find the error by find the product of the output errors and their weight connection.
              double weightedSum = 0;
              for (int k = 0; k < L2; k++)
              {
                  weightedSum += neuOutput[k].er * mat_hidden2output[i][k];
              }

              //using the error find the gradient of the output gate
              c.gradientOutputGate = fPrime(c.netOut) * activationFunctionH(c.cellState) * weightedSum;

              //internal cell state error
              c.cellStateError = c.yOut * weightedSum * hPrime(c.cellState);

              //weight updates

              //already done the deltas for the hidden-output connections

              //output gates. for each connection to the hidden layer
              //to the input layer
              LSTMWeight[] w_i = mat_input2hidden[i];
              for (int k = 0; k < sparseFeatureSize; k++)
              {
                  var entry = sparse.GetEntry(k);
                  //updates weights for input to hidden layer
                  if ((counter % 10) == 0)	//regularization is done every 10. step
                  {
                      w_i[entry.Key].wInputCell += alpha * c.cellStateError * w_i[entry.Key].dSInputCell - w_i[entry.Key].wInputCell * beta2;
                      w_i[entry.Key].wInputInputGate += alpha * c.cellStateError * w_i[entry.Key].dSInputInputGate - w_i[entry.Key].wInputInputGate * beta2;
                      w_i[entry.Key].wInputForgetGate += alpha * c.cellStateError * w_i[entry.Key].dSInputForgetGate - w_i[entry.Key].wInputForgetGate * beta2;
                      w_i[entry.Key].wInputOutputGate += alpha * c.gradientOutputGate * entry.Value - w_i[entry.Key].wInputOutputGate * beta2;
                  }
                  else
                  {
                      w_i[entry.Key].wInputCell += alpha * c.cellStateError * w_i[entry.Key].dSInputCell;
                      w_i[entry.Key].wInputInputGate += alpha * c.cellStateError * w_i[entry.Key].dSInputInputGate;
                      w_i[entry.Key].wInputForgetGate += alpha * c.cellStateError * w_i[entry.Key].dSInputForgetGate;
                      w_i[entry.Key].wInputOutputGate += alpha * c.gradientOutputGate * entry.Value;
                  }
              }

              if (fea_size > 0)
              {
                  w_i = mat_feature2hidden[i];
                  for (int j = 0; j < fea_size; j++)
                  {
                      //make the delta equal to the learning rate multiplied by the gradient multipled by the input for the connection
                      //update connection weights
                      if ((counter % 10) == 0)	//regularization is done every 10. step
                      {
                          w_i[j].wInputCell += alpha * c.cellStateError * w_i[j].dSInputCell - w_i[j].wInputCell * beta2;
                          w_i[j].wInputInputGate += alpha * c.cellStateError * w_i[j].dSInputInputGate - w_i[j].wInputInputGate * beta2;
                          w_i[j].wInputForgetGate += alpha * c.cellStateError * w_i[j].dSInputForgetGate - w_i[j].wInputForgetGate * beta2;
                          w_i[j].wInputOutputGate += alpha * c.gradientOutputGate * neuFeatures[j].ac - w_i[j].wInputOutputGate * beta2;
                      }
                      else
                      {
                          w_i[j].wInputCell += alpha * c.cellStateError * w_i[j].dSInputCell;
                          w_i[j].wInputInputGate += alpha * c.cellStateError * w_i[j].dSInputInputGate;
                          w_i[j].wInputForgetGate += alpha * c.cellStateError * w_i[j].dSInputForgetGate;
                          w_i[j].wInputOutputGate += alpha * c.gradientOutputGate * neuFeatures[j].ac;
                      }

                  }
              }

              //for the internal connection
              double deltaOutputGateCell = alpha * c.gradientOutputGate * c.cellState;

              //using internal partial derivative
              double deltaInputGateCell = alpha * c.cellStateError * c.dSWCellIn;

              double deltaForgetGateCell = alpha * c.cellStateError * c.dSWCellForget;

              //update internal weights
              if ((counter % 10) == 0)	//regularization is done every 10. step
              {
                  c.wCellIn += deltaInputGateCell - c.wCellIn * beta2;
                  c.wCellForget += deltaForgetGateCell - c.wCellForget * beta2;
                  c.wCellOut += deltaOutputGateCell - c.wCellOut * beta2;
              }
              else
              {
                  c.wCellIn += deltaInputGateCell;
                  c.wCellForget += deltaForgetGateCell;
                  c.wCellOut += deltaOutputGateCell;
              }

              neuHidden[i] = c;
              //update weights for hidden to output layer
              for (int k = 0; k < L2; k++)
              {
                  if ((counter % 10) == 0)	//regularization is done every 10. step
                  {
                      mat_hidden2output[i][k] += deltaHiddenOutput[i][k] - mat_hidden2output[i][k] * beta2;
                  }
                  else
                  {
                      mat_hidden2output[i][k] += deltaHiddenOutput[i][k];
                  }
              }
              });
        }
Example #2
0
        public override void learnNet(State state, int timeat)
        {
            if (m_bCRFTraining == true)
            {
                //For RNN-CRF, use joint probability of output layer nodes and transition between contigous nodes
                for (int c = 0; c < L2; c++)
                {
                    neuOutput[c].er = -m_Diff[timeat][c];
                }
                neuOutput[state.GetLabel()].er = 1 - m_Diff[timeat][state.GetLabel()];
            }
            else
            {
                //For standard RNN
                for (int c = 0; c < L2; c++)
                {
                    neuOutput[c].er = -neuOutput[c].ac;
                }
                neuOutput[state.GetLabel()].er = 1 - neuOutput[state.GetLabel()].ac;
            }

            for (int a = 0; a < L1; a++)
            {
                neuHidden[a].er = 0;
            }
            matrixXvectorADD(neuHidden, neuOutput, mat_hidden2output, 0, L2, 0, L1, 1);	//error output->hidden for words from specific class

            Parallel.For(0, L2, parallelOption, c =>
            {
                for (int a = 0; a < L1; a++)
                {
                    double dg = neuOutput[c].er * neuHidden[a].ac;

                    if ((counter % 10) == 0)	//regularization is done every 10. step
                    {
                        mat_hidden2output[c][a] += alpha * (dg - mat_hidden2output[c][a] * beta);
                    }
                    else
                    {
                        mat_hidden2output[c][a] += alpha * dg;
                    }
                }
            });
        }