Example #1
0
 public Sequence(int numStates)
 {
     States = new State[numStates];
     for (int i = 0; i < numStates; i++)
     {
         States[i] = new State();
     }
 }
Example #2
0
 public void SetSize(int numStates)
 {
     if (m_NumStates != numStates)
     {
         m_NumStates = numStates;
         m_States = null;
         if (m_NumStates > 0)
         {
             m_States = new State[m_NumStates];
             for (int i = 0; i < m_NumStates; i++)
             {
                 m_States[i] = new State();
             }
         }
     }
 }
Example #3
0
        public override void LearnBackTime(State state, int numStates, int curState)
        {
            if (bptt > 0)
            {
                //shift memory needed for bptt to next time step
                for (int a = bptt + bptt_block - 1; a > 0; a--)
                    bptt_inputs[a] = bptt_inputs[a - 1];
                bptt_inputs[0] = state.GetSparseData();

                for (int a = bptt + bptt_block - 1; a > 0; a--)
                {
                    for (int b = 0; b < L1; b++)
                    {
                        bptt_hidden[a * L1 + b] = bptt_hidden[(a - 1) * L1 + b];
                    }
                }

                for (int a = bptt + bptt_block - 1; a > 0; a--)
                {
                    for (int b = 0; b < fea_size; b++)
                    {
                        bptt_fea[a * fea_size + b].ac = bptt_fea[(a - 1) * fea_size + b].ac;
                    }
                }
            }

            //Save hidden and feature layer nodes values for bptt
            for (int b = 0; b < L1; b++)
            {
                bptt_hidden[b] = neuHidden[b];
            }
            for (int b = 0; b < fea_size; b++)
            {
                bptt_fea[b].ac = neuFeatures[b].ac;
            }

            // time to learn bptt
            if (((counter % bptt_block) == 0) || (curState == numStates - 1))
            {
                learnBptt(state);
            }
        }
Example #4
0
 public override void computeNet(State state, double[] doutput)
 {
 }
Example #5
0
 public override void learnNet(State state, int timeat)
 {
 }
Example #6
0
        public override void computeNet(State state, double[] doutput, bool isTrain = true)
        {

        }
Example #7
0
        public override void learnNet(State state, int timeat)
        {
            //create delta list
            double beta2 = beta * alpha;
            if (m_bCRFTraining == true)
            {
                //For RNN-CRF, use joint probability of output layer nodes and transition between contigous nodes
                for (int c = 0; c < L2; c++)
                {
                    neuOutput[c].er = -m_Diff[timeat][c];
                }
                neuOutput[state.GetLabel()].er = 1 - m_Diff[timeat][state.GetLabel()];
            }
            else
            {
                //For standard RNN
                for (int c = 0; c < L2; c++)
                {
                    neuOutput[c].er = -neuOutput[c].ac;
                }
                neuOutput[state.GetLabel()].er = 1 - neuOutput[state.GetLabel()].ac;
            }

            //Get sparse feature and apply it into hidden layer
            var sparse = state.GetSparseData();
            int sparseFeatureSize = sparse.GetNumberOfEntries();

            //put variables for derivaties in weight class and cell class
            Parallel.For(0, L1, parallelOption, i =>
            {
                LSTMWeight[] w_i = mat_input2hidden[i];
                LSTMCell c = neuHidden[i];
                for (int k = 0; k < sparseFeatureSize; k++)
                {
                    var entry = sparse.GetEntry(k);
                    LSTMWeight w = w_i[entry.Key];
                    w_i[entry.Key].dSInputCell = w.dSInputCell * c.yForget + gPrime(c.netCellState) * c.yIn * entry.Value;
                    w_i[entry.Key].dSInputInputGate = w.dSInputInputGate * c.yForget + activationFunctionG(c.netCellState) * fPrime(c.netIn) * entry.Value;
                    w_i[entry.Key].dSInputForgetGate = w.dSInputForgetGate * c.yForget + c.previousCellState * fPrime(c.netForget) * entry.Value;

                }

                if (fea_size > 0)
                {
                    w_i = mat_feature2hidden[i];
                    for (int j = 0; j < fea_size; j++)
                    {
                        LSTMWeight w = w_i[j];
                        w_i[j].dSInputCell = w.dSInputCell * c.yForget + gPrime(c.netCellState) * c.yIn * neuFeatures[j].ac;
                        w_i[j].dSInputInputGate = w.dSInputInputGate * c.yForget + activationFunctionG(c.netCellState) * fPrime(c.netIn) * neuFeatures[j].ac;
                        w_i[j].dSInputForgetGate = w.dSInputForgetGate * c.yForget + c.previousCellState * fPrime(c.netForget) * neuFeatures[j].ac;

                    }
                }

                //partial derivatives for internal connections
                c.dSWCellIn = c.dSWCellIn * c.yForget + activationFunctionG(c.netCellState) * fPrime(c.netIn) * c.cellState;

                //partial derivatives for internal connections, initially zero as dS is zero and previous cell state is zero
                c.dSWCellForget = c.dSWCellForget * c.yForget + c.previousCellState * fPrime(c.netForget) * c.previousCellState;

                neuHidden[i] = c;
            });

            //for all output neurons
            for (int k = 0; k < L2; k++)
            {
                //for each connection to the hidden layer
                double er = neuOutput[k].er;
                for (int j = 0; j <= L1; j++)
                {
                    deltaHiddenOutput[j][k] = alpha * neuHidden[j].cellOutput * er;
                }
            }

            //for each hidden neuron
            Parallel.For(0, L1, parallelOption, i =>
              {
              LSTMCell c = neuHidden[i];

              //find the error by find the product of the output errors and their weight connection.
              double weightedSum = 0;
              for (int k = 0; k < L2; k++)
              {
                  weightedSum += neuOutput[k].er * mat_hidden2output[i][k];
              }

              //using the error find the gradient of the output gate
              c.gradientOutputGate = fPrime(c.netOut) * activationFunctionH(c.cellState) * weightedSum;

              //internal cell state error
              c.cellStateError = c.yOut * weightedSum * hPrime(c.cellState);

              //weight updates

              //already done the deltas for the hidden-output connections

              //output gates. for each connection to the hidden layer
              //to the input layer
              LSTMWeight[] w_i = mat_input2hidden[i];
              for (int k = 0; k < sparseFeatureSize; k++)
              {
                  var entry = sparse.GetEntry(k);
                  //updates weights for input to hidden layer
                  if ((counter % 10) == 0)	//regularization is done every 10. step
                  {
                      w_i[entry.Key].wInputCell += alpha * c.cellStateError * w_i[entry.Key].dSInputCell - w_i[entry.Key].wInputCell * beta2;
                      w_i[entry.Key].wInputInputGate += alpha * c.cellStateError * w_i[entry.Key].dSInputInputGate - w_i[entry.Key].wInputInputGate * beta2;
                      w_i[entry.Key].wInputForgetGate += alpha * c.cellStateError * w_i[entry.Key].dSInputForgetGate - w_i[entry.Key].wInputForgetGate * beta2;
                      w_i[entry.Key].wInputOutputGate += alpha * c.gradientOutputGate * entry.Value - w_i[entry.Key].wInputOutputGate * beta2;
                  }
                  else
                  {
                      w_i[entry.Key].wInputCell += alpha * c.cellStateError * w_i[entry.Key].dSInputCell;
                      w_i[entry.Key].wInputInputGate += alpha * c.cellStateError * w_i[entry.Key].dSInputInputGate;
                      w_i[entry.Key].wInputForgetGate += alpha * c.cellStateError * w_i[entry.Key].dSInputForgetGate;
                      w_i[entry.Key].wInputOutputGate += alpha * c.gradientOutputGate * entry.Value;
                  }
              }

              if (fea_size > 0)
              {
                  w_i = mat_feature2hidden[i];
                  for (int j = 0; j < fea_size; j++)
                  {
                      //make the delta equal to the learning rate multiplied by the gradient multipled by the input for the connection
                      //update connection weights
                      if ((counter % 10) == 0)	//regularization is done every 10. step
                      {
                          w_i[j].wInputCell += alpha * c.cellStateError * w_i[j].dSInputCell - w_i[j].wInputCell * beta2;
                          w_i[j].wInputInputGate += alpha * c.cellStateError * w_i[j].dSInputInputGate - w_i[j].wInputInputGate * beta2;
                          w_i[j].wInputForgetGate += alpha * c.cellStateError * w_i[j].dSInputForgetGate - w_i[j].wInputForgetGate * beta2;
                          w_i[j].wInputOutputGate += alpha * c.gradientOutputGate * neuFeatures[j].ac - w_i[j].wInputOutputGate * beta2;
                      }
                      else
                      {
                          w_i[j].wInputCell += alpha * c.cellStateError * w_i[j].dSInputCell;
                          w_i[j].wInputInputGate += alpha * c.cellStateError * w_i[j].dSInputInputGate;
                          w_i[j].wInputForgetGate += alpha * c.cellStateError * w_i[j].dSInputForgetGate;
                          w_i[j].wInputOutputGate += alpha * c.gradientOutputGate * neuFeatures[j].ac;
                      }

                  }
              }

              //for the internal connection
              double deltaOutputGateCell = alpha * c.gradientOutputGate * c.cellState;

              //using internal partial derivative
              double deltaInputGateCell = alpha * c.cellStateError * c.dSWCellIn;

              double deltaForgetGateCell = alpha * c.cellStateError * c.dSWCellForget;

              //update internal weights
              if ((counter % 10) == 0)	//regularization is done every 10. step
              {
                  c.wCellIn += deltaInputGateCell - c.wCellIn * beta2;
                  c.wCellForget += deltaForgetGateCell - c.wCellForget * beta2;
                  c.wCellOut += deltaOutputGateCell - c.wCellOut * beta2;
              }
              else
              {
                  c.wCellIn += deltaInputGateCell;
                  c.wCellForget += deltaForgetGateCell;
                  c.wCellOut += deltaOutputGateCell;
              }

              neuHidden[i] = c;
              //update weights for hidden to output layer
              for (int k = 0; k < L2; k++)
              {
                  if ((counter % 10) == 0)	//regularization is done every 10. step
                  {
                      mat_hidden2output[i][k] += deltaHiddenOutput[i][k] - mat_hidden2output[i][k] * beta2;
                  }
                  else
                  {
                      mat_hidden2output[i][k] += deltaHiddenOutput[i][k];
                  }
              }
              });
        }
Example #8
0
        // forward process. output layer consists of tag value
        public override void computeNet(State state, double[] doutput)
        {
            //erase activations
            for (int a = 0; a < L1; a++)
                neuHidden[a].ac = 0;

            //hidden(t-1) -> hidden(t)
            matrixXvectorADD(neuHidden, neuInput, mat_hiddenBpttWeight, 0, L1, L0 - L1, L0, 0);

            //inputs(t) -> hidden(t)
            //Get sparse feature and apply it into hidden layer
            var sparse = state.GetSparseData();
            int n = sparse.GetNumberOfEntries();

            for (int i = 0; i < n; i++)
            {
                var entry = sparse.GetEntry(i);
                for (int b = 0; b < L1; b++)
                {
                    neuHidden[b].ac += entry.Value * mat_input2hidden[b][entry.Key];
                }
            }

            //fea(t) -> hidden(t)
            if (fea_size > 0)
            {
                matrixXvectorADD(neuHidden, neuFeatures, mat_feature2hidden, 0, L1, 0, fea_size, 0);
            }

            //activate 1      --sigmoid
            computeHiddenActivity();

            //initialize output nodes
            for (int c = 0; c < L2; c++)
            {
                neuOutput[c].ac = 0;
            }

            matrixXvectorADD(neuOutput, neuHidden, mat_hidden2output, 0, L2, 0, L1, 0);
            if (doutput != null)
            {
                for (int i = 0; i < L2; i++)
                {
                    doutput[i] = neuOutput[i].ac;
                }
            }

            //activation 2   --softmax on words
            double sum = 0;   //sum is used for normalization: it's better to have larger precision as many numbers are summed together here
            for (int c = 0; c < L2; c++)
            {
                if (neuOutput[c].ac > 50) neuOutput[c].ac = 50;  //for numerical stability
                if (neuOutput[c].ac < -50) neuOutput[c].ac = -50;  //for numerical stability
                double val = Math.Exp(neuOutput[c].ac);
                sum += val;
                neuOutput[c].ac = val;
            }

            for (int c = 0; c < L2; c++)
            {
                neuOutput[c].ac /= sum;
            }
        }
Example #9
0
        public override void LearnBackTime(State state, int numStates, int curState)
        {
            int maxBptt = 0;
            for (maxBptt = 0; maxBptt < bptt + bptt_block - 1; maxBptt++)
            {
                if (bptt_inputs[maxBptt] == null)
                {
                    break;
                }
            }

            //shift memory needed for bptt to next time step
            for (int a = maxBptt; a > 0; a--)
            {
                bptt_inputs[a] = bptt_inputs[a - 1];
                Array.Copy(bptt_hidden, (a - 1) * L1, bptt_hidden, a * L1, L1);
                Array.Copy(bptt_fea, (a - 1) * DenseFeatureSize, bptt_fea, a * DenseFeatureSize, DenseFeatureSize);
            }
            bptt_inputs[0] = state.SparseData;

            //Save hidden and feature layer nodes values for bptt
            Array.Copy(neuHidden, 0, bptt_hidden, 0, L1);

            for (int i = 0; i < DenseFeatureSize; i++)
            {
                bptt_fea[i] = neuFeatures[i];
            }

            // time to learn bptt
            if (((curState % bptt_block) == 0) || (curState == numStates - 1))
            {
                learnBptt(state);
            }
        }
Example #10
0
        void learnBptt(State state)
        {
            for (int step = 0; step < bptt + bptt_block - 2; step++)
            {
                if (null == bptt_inputs[step])
                    break;

                var sparse = bptt_inputs[step];
                Parallel.For(0, L1, parallelOption, a =>
                {
                    // compute hidden layer gradient
                    neuHidden[a].er *= neuHidden[a].cellOutput * (1 - neuHidden[a].cellOutput);

                    //dense weight update fea->0
                    if (DenseFeatureSize > 0)
                    {
                        for (int i = 0; i < DenseFeatureSize; i++)
                        {
                            mat_bptt_synf[a][i] += neuHidden[a].er * bptt_fea[i + step * DenseFeatureSize];
                        }
                    }

                    //sparse weight update hidden->input
                    for (int i = 0; i < sparse.GetNumberOfEntries(); i++)
                    {
                        mat_bptt_syn0_w[a][sparse.GetEntry(i).Key] += neuHidden[a].er * sparse.GetEntry(i).Value;
                    }

                    //bptt weight update
                    for (int i = 0; i < L1; i++)
                    {
                        mat_bptt_syn0_ph[a][i] += neuHidden[a].er * neuLastHidden[i].cellOutput;
                    }

                });

                //propagates errors hidden->input to the recurrent part
                matrixXvectorADD(neuLastHidden, neuHidden, mat_hiddenBpttWeight, 0, L1, 0, L1, 1);

                for (int a = 0; a < L1; a++)
                {
                    //propagate error from time T-n to T-n-1
                    neuHidden[a].er = neuLastHidden[a].er + bptt_hidden[(step + 1) * L1 + a].er;
                }

                if (step < bptt + bptt_block - 3)
                {
                    for (int a = 0; a < L1; a++)
                    {
                        neuHidden[a].cellOutput = bptt_hidden[(step + 1) * L1 + a].cellOutput;
                        neuLastHidden[a].cellOutput = bptt_hidden[(step + 2) * L1 + a].cellOutput;
                    }
                }
            }

            for (int b = 0; b < L1; b++)
            {
                neuHidden[b].cellOutput = bptt_hidden[b].cellOutput;		//restore hidden layer after bptt
            }


            Parallel.For(0, L1, parallelOption, b =>
            {
                //Update bptt feature weights
                for (int i = 0; i < L1; i++)
                {
                    mat_hiddenBpttWeight[b][i] += LearningRate * mat_bptt_syn0_ph[b][i];
                    //Clean bptt weight error
                    mat_bptt_syn0_ph[b][i] = 0;
                }

                //Update dense feature weights
                if (DenseFeatureSize > 0)
                {
                    for (int i = 0; i < DenseFeatureSize; i++)
                    {
                        mat_feature2hidden[b][i] += LearningRate * mat_bptt_synf[b][i];
                        //Clean dense feature weights error
                        mat_bptt_synf[b][i] = 0;
                    }
                }

                //Update sparse feature weights
                for (int step = 0; step < bptt + bptt_block - 2; step++)
                {
                    if (null == bptt_inputs[step])
                        break;

                    var sparse = bptt_inputs[step];
                    for (int i = 0; i < sparse.GetNumberOfEntries(); i++)
                    {
                        int pos = sparse.GetEntry(i).Key;
                        mat_input2hidden[b][pos] += LearningRate * mat_bptt_syn0_w[b][pos];

                        //Clean sparse feature weight error
                        mat_bptt_syn0_w[b][pos] = 0;
                    }
                }
            });
        }
Example #11
0
        public override void learnNet(State state, int timeat, bool biRNN = false)
        {
            if (biRNN == false)
            {
                CalculateOutputLayerError(state, timeat);
            }

            //error output->hidden for words from specific class    	
            matrixXvectorADD(neuHidden, OutputLayer, Hidden2OutputWeight, 0, L1, 0, L2, 1);

            //Apply drop out on error in hidden layer
            for (int i = 0; i < L1; i++)
            {
                if (neuHidden[i].mask == true)
                {
                    neuHidden[i].er = 0;
                }
            }

            //Update hidden-output weights
            Parallel.For(0, L1, parallelOption, a =>
            {
                for (int c = 0; c < L2; c++)
                {
                    Hidden2OutputWeight[c][a] += LearningRate * OutputLayer[c].er * neuHidden[a].cellOutput;
                }
            });
        }
Example #12
0
        // forward process. output layer consists of tag value
        public override void computeNet(State state, double[] doutput, bool isTrain = true)
        {
            //keep last hidden layer and erase activations
            neuLastHidden = neuHidden;

            //hidden(t-1) -> hidden(t)
            neuHidden = new neuron[L1];
            matrixXvectorADD(neuHidden, neuLastHidden, mat_hiddenBpttWeight, 0, L1, 0, L1, 0);

            //Apply feature values on hidden layer
            var sparse = state.SparseData;
            int n = sparse.GetNumberOfEntries();
            Parallel.For(0, L1, parallelOption, b =>
            {
                //Sparse features:
                //inputs(t) -> hidden(t)
                //Get sparse feature and apply it into hidden layer
                for (int i = 0; i < n; i++)
                {
                    var entry = sparse.GetEntry(i);
                    neuHidden[b].cellOutput += entry.Value * mat_input2hidden[b][entry.Key];
                }

                //Dense features:
                //fea(t) -> hidden(t) 
                if (DenseFeatureSize > 0)
                {
                    for (int j = 0; j < DenseFeatureSize; j++)
                    {
                        neuHidden[b].cellOutput += neuFeatures[j] * mat_feature2hidden[b][j];
                    }
                }
            });

            //activate 1      --sigmoid
            computeHiddenActivity(isTrain);

            //Calculate output layer
            matrixXvectorADD(OutputLayer, neuHidden, Hidden2OutputWeight, 0, L2, 0, L1, 0);
            if (doutput != null)
            {
                for (int i = 0; i < L2; i++)
                {
                    doutput[i] = OutputLayer[i].cellOutput;
                }
            }

            //activation 2   --softmax on words
            SoftmaxLayer(OutputLayer);
        }
Example #13
0
        // forward process. output layer consists of tag value
        public override void computeNet(State state, double[] doutput, bool isTrain = true)
        {
            //inputs(t) -> hidden(t)
            //Get sparse feature and apply it into hidden layer
            var sparse = state.SparseData;
            int sparseFeatureSize = sparse.GetNumberOfEntries();

            Parallel.For(0, L1, parallelOption, j =>
            {
                LSTMCell cell_j = neuHidden[j];

                //hidden(t-1) -> hidden(t)
                cell_j.previousCellState = cell_j.cellState;

                //rest the value of the net input to zero
                cell_j.netIn = 0;

                cell_j.netForget = 0;
                //reset each netCell state to zero
                cell_j.netCellState = 0;
                //reset each netOut to zero
                cell_j.netOut = 0;
                for (int i = 0; i < sparseFeatureSize; i++)
                {
                    var entry = sparse.GetEntry(i);
                    LSTMWeight w = input2hidden[j][entry.Key];
                    //loop through all forget gates in hiddden layer
                    cell_j.netIn += entry.Value * w.wInputInputGate;
                    cell_j.netForget += entry.Value * w.wInputForgetGate;
                    cell_j.netCellState += entry.Value * w.wInputCell;
                    cell_j.netOut += entry.Value * w.wInputOutputGate;
                }


                //fea(t) -> hidden(t) 
                if (DenseFeatureSize > 0)
                {
                    for (int i = 0; i < DenseFeatureSize; i++)
                    {
                        LSTMWeight w = feature2hidden[j][i];
                        cell_j.netIn += neuFeatures[i] * w.wInputInputGate;
                        cell_j.netForget += neuFeatures[i] * w.wInputForgetGate;
                        cell_j.netCellState += neuFeatures[i] * w.wInputCell;
                        cell_j.netOut += neuFeatures[i] * w.wInputOutputGate;
                    }
                }

                //include internal connection multiplied by the previous cell state
                cell_j.netIn += cell_j.previousCellState * cell_j.wCellIn;
                //squash input
                cell_j.yIn = (float)Sigmoid(cell_j.netIn);

                //include internal connection multiplied by the previous cell state
                cell_j.netForget += cell_j.previousCellState * cell_j.wCellForget;
                cell_j.yForget = (float)Sigmoid(cell_j.netForget);

                if (cell_j.mask == true)
                {
                    cell_j.cellState = 0;
                }
                else
                {
                    //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate
                    cell_j.cellState = (float)(cell_j.yForget * cell_j.previousCellState + cell_j.yIn * Sigmoid2(cell_j.netCellState));
                }

                ////include the internal connection multiplied by the CURRENT cell state
                cell_j.netOut += cell_j.cellState * cell_j.wCellOut;

                //squash output gate 
                cell_j.yOut = (float)(Sigmoid(cell_j.netOut));

                cell_j.cellOutput = cell_j.cellState * cell_j.yOut;


                neuHidden[j] = cell_j;
            });

            matrixXvectorADD(OutputLayer, neuHidden, Hidden2OutputWeight, 0, L2, 0, L1);
            if (doutput != null)
            {
                for (int i = 0; i < L2; i++)
                {
                    doutput[i] = OutputLayer[i].cellOutput;
                }
            }

            //activation 2   --softmax on words
            SoftmaxLayer(OutputLayer);
        }
Example #14
0
        public override void learnNet(State state, int timeat, bool biRNN = false)
        {
            //create delta list
            if (biRNN == false)
            {
                CalculateOutputLayerError(state, timeat);
            }

            //Get sparse feature and apply it into hidden layer
            var sparse = state.SparseData;
            int sparseFeatureSize = sparse.GetNumberOfEntries();

            //put variables for derivaties in weight class and cell class
            Parallel.For(0, L1, parallelOption, i =>
            {
                LSTMWeightDerivative[] w_i = input2hiddenDeri[i];
                LSTMCell c = neuHidden[i];
                float Sigmoid2Derivative_ci_netCellState_mul_ci_yIn = (float)(Sigmoid2Derivative(c.netCellState) * c.yIn);
                float Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn = (float)(Sigmoid2(c.netCellState) * SigmoidDerivative(c.netIn));
                float ci_previousCellState_mul_SigmoidDerivative_ci_netForget = (float)(c.previousCellState * SigmoidDerivative(c.netForget));

                for (int k = 0; k < sparseFeatureSize; k++)
                {
                    var entry = sparse.GetEntry(k);
                    LSTMWeightDerivative w = w_i[entry.Key];
                    w_i[entry.Key].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative_ci_netCellState_mul_ci_yIn * entry.Value;
                    w_i[entry.Key].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * entry.Value;
                    w_i[entry.Key].dSInputForgetGate = w.dSInputForgetGate * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * entry.Value;
                }

                if (DenseFeatureSize > 0)
                {
                    w_i = feature2hiddenDeri[i];
                    for (int j = 0; j < DenseFeatureSize; j++)
                    {
                        LSTMWeightDerivative w = w_i[j];
                        w_i[j].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative_ci_netCellState_mul_ci_yIn * neuFeatures[j];
                        w_i[j].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * neuFeatures[j];
                        w_i[j].dSInputForgetGate = w.dSInputForgetGate * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * neuFeatures[j];
                    }
                }

                //partial derivatives for internal connections
                c.dSWCellIn = c.dSWCellIn * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * c.cellState;

                //partial derivatives for internal connections, initially zero as dS is zero and previous cell state is zero
                c.dSWCellForget = c.dSWCellForget * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * c.previousCellState;

                neuHidden[i] = c;
            });

            //for each hidden neuron
            Parallel.For(0, L1, parallelOption, i =>
          {
              LSTMCell c = neuHidden[i];

              //find the error by find the product of the output errors and their weight connection.
              var weightedSum = 0.0;
              for (int k = 0; k < L2; k++)
              {
                  weightedSum += OutputLayer[k].er * Hidden2OutputWeight[k][i];
              }
              weightedSum = NormalizeErr(weightedSum);

              //using the error find the gradient of the output gate
              var gradientOutputGate = (float)(LearningRate * SigmoidDerivative(c.netOut) * c.cellState * weightedSum);

              //internal cell state error
              var cellStateError = (float)(LearningRate * c.yOut * weightedSum);

              //weight updates
              LSTMWeight[] w_i = input2hidden[i];
              LSTMWeightDerivative[] wd_i = input2hiddenDeri[i];
              for (int k = 0; k < sparseFeatureSize; k++)
              {
                  var entry = sparse.GetEntry(k);
                  //updates weights for input to hidden layer
                  w_i[entry.Key].wInputCell += cellStateError * wd_i[entry.Key].dSInputCell;
                  w_i[entry.Key].wInputInputGate += cellStateError * wd_i[entry.Key].dSInputInputGate;
                  w_i[entry.Key].wInputForgetGate += cellStateError * wd_i[entry.Key].dSInputForgetGate;
                  w_i[entry.Key].wInputOutputGate += gradientOutputGate * entry.Value;
              }


              if (DenseFeatureSize > 0)
              {
                  w_i = feature2hidden[i];
                  wd_i = feature2hiddenDeri[i];
                  for (int j = 0; j < DenseFeatureSize; j++)
                  {
                      //make the delta equal to the learning rate multiplied by the gradient multipled by the input for the connection
                      //update connection weights
                      w_i[j].wInputCell += cellStateError * wd_i[j].dSInputCell;
                      w_i[j].wInputInputGate += cellStateError * wd_i[j].dSInputInputGate;
                      w_i[j].wInputForgetGate += cellStateError * wd_i[j].dSInputForgetGate;
                      w_i[j].wInputOutputGate += gradientOutputGate * neuFeatures[j];
                  }
              }

              //update internal weights
              c.wCellIn += cellStateError * c.dSWCellIn;
              c.wCellForget += cellStateError * c.dSWCellForget;
              c.wCellOut += gradientOutputGate * c.cellState;

              neuHidden[i] = c;
          });

            //update weights for hidden to output layer
            Parallel.For(0, L1, parallelOption, i =>
            {
                for (int k = 0; k < L2; k++)
                {
                    Hidden2OutputWeight[k][i] += (float)(LearningRate * neuHidden[i].cellOutput * OutputLayer[k].er);
                }
            });
        }
Example #15
0
        public override void learnNet(State state, int timeat)
        {
            if (m_bCRFTraining == true)
            {
                //For RNN-CRF, use joint probability of output layer nodes and transition between contigous nodes
                for (int c = 0; c < L2; c++)
                {
                    neuOutput[c].er = -m_Diff[timeat][c];
                }
                neuOutput[state.GetLabel()].er = 1 - m_Diff[timeat][state.GetLabel()];
            }
            else
            {
                //For standard RNN
                for (int c = 0; c < L2; c++)
                {
                    neuOutput[c].er = -neuOutput[c].ac;
                }
                neuOutput[state.GetLabel()].er = 1 - neuOutput[state.GetLabel()].ac;
            }

            for (int a = 0; a < L1; a++)
            {
                neuHidden[a].er = 0;
            }
            matrixXvectorADD(neuHidden, neuOutput, mat_hidden2output, 0, L2, 0, L1, 1);	//error output->hidden for words from specific class

            Parallel.For(0, L2, parallelOption, c =>
            {
                for (int a = 0; a < L1; a++)
                {
                    double dg = neuOutput[c].er * neuHidden[a].ac;

                    if ((counter % 10) == 0)	//regularization is done every 10. step
                    {
                        mat_hidden2output[c][a] += alpha * (dg - mat_hidden2output[c][a] * beta);
                    }
                    else
                    {
                        mat_hidden2output[c][a] += alpha * dg;
                    }
                }
            });
        }
Example #16
0
        void learnBptt(State state)
        {
            for (int step = 0; step < bptt + bptt_block - 2; step++)
            {
                if (null == bptt_inputs[step])
                    break;

                // compute hidden layter gradient
                for (int a = 0; a < L1; a++)
                {
                    neuHidden[a].er *= neuHidden[a].ac * (1 - neuHidden[a].ac);
                }

                //weight update fea->0
                if (fea_size > 0)
                {
                    Parallel.For(0, L1, parallelOption, b =>
                    {
                        for (int a = 0; a < fea_size; a++)
                        {
                            mat_bptt_synf[b][a] += neuHidden[b].er * bptt_fea[a + step * fea_size].ac;
                        }
                    });
                }

                //weight update hidden->input
                var sparse = bptt_inputs[step];
                Parallel.For(0, L1, parallelOption, b =>
                {
                    for (int i = 0; i < sparse.GetNumberOfEntries(); i++)
                    {
                        mat_bptt_syn0_w[b][sparse.GetEntry(i).Key] += neuHidden[b].er * sparse.GetEntry(i).Value;

                    }
                });

                for (int a = L0 - L1; a < L0; a++)
                {
                    neuInput[a].er = 0;
                }

                matrixXvectorADD(neuInput, neuHidden, mat_hiddenBpttWeight, 0, L1, L0 - L1, L0, 1);		//propagates errors hidden->input to the recurrent part

                Parallel.For(0, L1, parallelOption, b =>
                {
                    for (int a = 0; a < L1; a++)
                    {
                        mat_bptt_syn0_ph[b][a] += neuHidden[b].er * neuInput[L0 - L1 + a].ac;
                    }
                });

                for (int a = 0; a < L1; a++)
                {
                    //propagate error from time T-n to T-n-1
                    neuHidden[a].er = neuInput[a + L0 - L1].er + bptt_hidden[(step + 1) * L1 + a].er;
                }

                if (step < bptt + bptt_block - 3)
                {
                    for (int a = 0; a < L1; a++)
                    {
                        neuHidden[a].ac = bptt_hidden[(step + 1) * L1 + a].ac;
                        neuInput[a + L0 - L1].ac = bptt_hidden[(step + 2) * L1 + a].ac;
                    }
                }
            }

            for (int a = 0; a < (bptt + bptt_block) * L1; a++)
            {
                bptt_hidden[a].er = 0;
            }

            for (int b = 0; b < L1; b++)
            {
                neuHidden[b].ac = bptt_hidden[b].ac;		//restore hidden layer after bptt
            }

            UpdateWeights(mat_hiddenBpttWeight, mat_bptt_syn0_ph);

            if (fea_size > 0)
            {
                UpdateWeights(mat_feature2hidden, mat_bptt_synf);
            }

            Parallel.For(0, L1, parallelOption, b =>
            {
                for (int step = 0; step < bptt + bptt_block - 2; step++)
                {
                    if (null == bptt_inputs[step])
                        break;

                    var sparse = bptt_inputs[step];
                    for (int i = 0; i < sparse.GetNumberOfEntries(); i++)
                    {
                        int pos = sparse.GetEntry(i).Key;
                        if ((counter % 10) == 0)
                        {
                            mat_input2hidden[b][pos] += alpha * (mat_bptt_syn0_w[b][pos] - mat_input2hidden[b][pos] * beta);
                        }
                        else
                        {
                            mat_input2hidden[b][pos] += alpha * mat_bptt_syn0_w[b][pos];
                        }

                        mat_bptt_syn0_w[b][pos] = 0;
                    }
                }
            });
        }
Example #17
0
        public void SetInputLayer(State state, int curState, int numStates, int[] predicted, bool forward = true)
        {
            if (predicted != null && state.RuntimeFeatures != null)
            {
                // set runtime feature
                for (int i = 0; i < state.RuntimeFeatures.Length; i++)
                {
                    for (int j = 0; j < OutputLayer.LayerSize; j++)
                    {
                        //Clean up run time feature value and then set a new one
                        state.SetRuntimeFeature(i, j, 0);
                    }

                    int pos = curState + ((forward == true) ? 1 : -1) * state.RuntimeFeatures[i].OffsetToCurrentState;
                    if (pos >= 0 && pos < numStates)
                    {
                        state.SetRuntimeFeature(i, predicted[pos], 1);
                    }
                }
            }
        }
Example #18
0
 public override void LearnBackTime(State state, int numStates, int curState)
 {
 }
Example #19
0
        void ExtractSparseFeature(int currentState, int numStates, List<string[]> features, State pState)
        {
            Dictionary<int, float> sparseFeature = new Dictionary<int, float>();
            int start = 0;
            var fc = m_FeatureConfiguration;

            //Extract TFeatures in given context window
            if (m_TFeaturizer != null)
            {
                if (fc.ContainsKey(TFEATURE_CONTEXT) == true)
                {
                    List<int> v = fc[TFEATURE_CONTEXT];
                    for (int j = 0; j < v.Count; j++)
                    {
                        int offset = TruncPosition(currentState + v[j], 0, numStates);

                        List<int> tfeatureList = m_TFeaturizer.GetFeatureIds(features, offset);
                        foreach (int featureId in tfeatureList)
                        {
                            if (m_TFeatureWeightType == TFEATURE_WEIGHT_TYPE_ENUM.BINARY)
                            {
                                sparseFeature[start + featureId] = 1;
                            }
                            else
                            {
                                if (sparseFeature.ContainsKey(start + featureId) == false)
                                {
                                    sparseFeature.Add(start + featureId, 1);
                                }
                                else
                                {
                                    sparseFeature[start + featureId]++;
                                }
                            }
                        }
                        start += m_TFeaturizer.GetFeatureSize();
                    }
                }
            }

            // Create place hold for run time feature
            // The real feature value is calculated at run time
            if (fc.ContainsKey(RT_FEATURE_CONTEXT) == true)
            {
                List<int> v = fc[RT_FEATURE_CONTEXT];
                pState.RuntimeFeatures = new PriviousLabelFeature[v.Count];
                for (int j = 0; j < v.Count; j++)
                {
                    if (v[j] < 0)
                    {
                        pState.AddRuntimeFeaturePlacehold(j, v[j], sparseFeature.Count, start);
                        sparseFeature[start] = 0; //Placehold a position
                        start += TagSet.GetSize();
                    }
                    else
                    {
                        throw new Exception("The offset of run time feature should be negative.");
                    }
                }
            }

            SparseVector spSparseFeature = pState.SparseData;
            spSparseFeature.SetDimension(m_SparseDimension);
            spSparseFeature.SetData(sparseFeature);
        }
Example #20
0
        // forward process. output layer consists of tag value
        public override void computeNet(State state, double[] doutput)
        {
            //inputs(t) -> hidden(t)
            //Get sparse feature and apply it into hidden layer
            var sparse = state.GetSparseData();
            int sparseFeatureSize = sparse.GetNumberOfEntries();

            //loop through all input gates in hidden layer
            //for each hidden neuron
            Parallel.For(0, L1, parallelOption, j =>
              {
              //rest the value of the net input to zero
              neuHidden[j].netIn = 0;

              //hidden(t-1) -> hidden(t)
              neuHidden[j].previousCellState = neuHidden[j].cellState;

              //for each input neuron
              for (int i = 0; i < sparseFeatureSize; i++)
              {
                  var entry = sparse.GetEntry(i);
                  neuHidden[j].netIn += entry.Value * mat_input2hidden[j][entry.Key].wInputInputGate;
              }

              });

            //fea(t) -> hidden(t)
            if (fea_size > 0)
            {
                matrixXvectorADD(neuHidden, neuFeatures, mat_feature2hidden, 0, L1, 0, fea_size);
            }

            Parallel.For(0, L1, parallelOption, j =>
            {
                LSTMCell cell_j = neuHidden[j];

                //include internal connection multiplied by the previous cell state
                cell_j.netIn += cell_j.previousCellState * cell_j.wCellIn;

                //squash input
                cell_j.yIn = activationFunctionF(cell_j.netIn);

                cell_j.netForget = 0;
                //reset each netCell state to zero
                cell_j.netCellState = 0;
                //reset each netOut to zero
                cell_j.netOut = 0;
                for (int i = 0; i < sparseFeatureSize; i++)
                {
                    var entry = sparse.GetEntry(i);
                    LSTMWeight w = mat_input2hidden[j][entry.Key];
                    //loop through all forget gates in hiddden layer
                    cell_j.netForget += entry.Value * w.wInputForgetGate;
                    cell_j.netCellState += entry.Value * w.wInputCell;
                    cell_j.netOut += entry.Value * w.wInputOutputGate;
                }

                if (fea_size > 0)
                {
                    for (int i = 0; i < fea_size; i++)
                    {
                        LSTMWeight w = mat_feature2hidden[j][i];
                        cell_j.netForget += neuFeatures[i].ac * w.wInputForgetGate;
                        cell_j.netCellState += neuFeatures[i].ac * w.wInputCell;
                        cell_j.netOut += neuFeatures[i].ac * w.wInputOutputGate;
                    }
                }

                //include internal connection multiplied by the previous cell state
                cell_j.netForget += cell_j.previousCellState * cell_j.wCellForget;
                cell_j.yForget = activationFunctionF(cell_j.netForget);

                //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate
                cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * activationFunctionG(cell_j.netCellState);

                //include the internal connection multiplied by the CURRENT cell state
                cell_j.netOut += cell_j.cellState * cell_j.wCellOut;

                //squash output gate
                cell_j.yOut = activationFunctionF(cell_j.netOut);

                cell_j.cellOutput = activationFunctionH(cell_j.cellState) * cell_j.yOut;

                neuHidden[j] = cell_j;
            });

            //initialize output nodes
            for (int c = 0; c < L2; c++)
            {
                neuOutput[c].ac = 0;
            }

            matrixXvectorADD(neuOutput, neuHidden, mat_hidden2output, 0, L2, 0, L1);
            if (doutput != null)
            {
                for (int i = 0; i < L2; i++)
                {
                    doutput[i] = neuOutput[i].ac;
                }
            }

            //activation 2   --softmax on words
            double sum = 0;   //sum is used for normalization: it's better to have larger precision as many numbers are summed together here
            for (int c = 0; c < L2; c++)
            {
                if (neuOutput[c].ac > 50) neuOutput[c].ac = 50;  //for numerical stability
                if (neuOutput[c].ac < -50) neuOutput[c].ac = -50;  //for numerical stability
                double val = Math.Exp(neuOutput[c].ac);
                sum += val;
                neuOutput[c].ac = val;
            }

            for (int c = 0; c < L2; c++)
            {
                neuOutput[c].ac /= sum;
            }
        }
Example #21
0
        public override void learnNet(State state, int timeat, bool biRNN = false)
        {

        }