public Sequence(int numStates) { States = new State[numStates]; for (int i = 0; i < numStates; i++) { States[i] = new State(); } }
public void SetSize(int numStates) { if (m_NumStates != numStates) { m_NumStates = numStates; m_States = null; if (m_NumStates > 0) { m_States = new State[m_NumStates]; for (int i = 0; i < m_NumStates; i++) { m_States[i] = new State(); } } } }
public override void LearnBackTime(State state, int numStates, int curState) { if (bptt > 0) { //shift memory needed for bptt to next time step for (int a = bptt + bptt_block - 1; a > 0; a--) bptt_inputs[a] = bptt_inputs[a - 1]; bptt_inputs[0] = state.GetSparseData(); for (int a = bptt + bptt_block - 1; a > 0; a--) { for (int b = 0; b < L1; b++) { bptt_hidden[a * L1 + b] = bptt_hidden[(a - 1) * L1 + b]; } } for (int a = bptt + bptt_block - 1; a > 0; a--) { for (int b = 0; b < fea_size; b++) { bptt_fea[a * fea_size + b].ac = bptt_fea[(a - 1) * fea_size + b].ac; } } } //Save hidden and feature layer nodes values for bptt for (int b = 0; b < L1; b++) { bptt_hidden[b] = neuHidden[b]; } for (int b = 0; b < fea_size; b++) { bptt_fea[b].ac = neuFeatures[b].ac; } // time to learn bptt if (((counter % bptt_block) == 0) || (curState == numStates - 1)) { learnBptt(state); } }
public override void computeNet(State state, double[] doutput) { }
public override void learnNet(State state, int timeat) { }
public override void computeNet(State state, double[] doutput, bool isTrain = true) { }
public override void learnNet(State state, int timeat) { //create delta list double beta2 = beta * alpha; if (m_bCRFTraining == true) { //For RNN-CRF, use joint probability of output layer nodes and transition between contigous nodes for (int c = 0; c < L2; c++) { neuOutput[c].er = -m_Diff[timeat][c]; } neuOutput[state.GetLabel()].er = 1 - m_Diff[timeat][state.GetLabel()]; } else { //For standard RNN for (int c = 0; c < L2; c++) { neuOutput[c].er = -neuOutput[c].ac; } neuOutput[state.GetLabel()].er = 1 - neuOutput[state.GetLabel()].ac; } //Get sparse feature and apply it into hidden layer var sparse = state.GetSparseData(); int sparseFeatureSize = sparse.GetNumberOfEntries(); //put variables for derivaties in weight class and cell class Parallel.For(0, L1, parallelOption, i => { LSTMWeight[] w_i = mat_input2hidden[i]; LSTMCell c = neuHidden[i]; for (int k = 0; k < sparseFeatureSize; k++) { var entry = sparse.GetEntry(k); LSTMWeight w = w_i[entry.Key]; w_i[entry.Key].dSInputCell = w.dSInputCell * c.yForget + gPrime(c.netCellState) * c.yIn * entry.Value; w_i[entry.Key].dSInputInputGate = w.dSInputInputGate * c.yForget + activationFunctionG(c.netCellState) * fPrime(c.netIn) * entry.Value; w_i[entry.Key].dSInputForgetGate = w.dSInputForgetGate * c.yForget + c.previousCellState * fPrime(c.netForget) * entry.Value; } if (fea_size > 0) { w_i = mat_feature2hidden[i]; for (int j = 0; j < fea_size; j++) { LSTMWeight w = w_i[j]; w_i[j].dSInputCell = w.dSInputCell * c.yForget + gPrime(c.netCellState) * c.yIn * neuFeatures[j].ac; w_i[j].dSInputInputGate = w.dSInputInputGate * c.yForget + activationFunctionG(c.netCellState) * fPrime(c.netIn) * neuFeatures[j].ac; w_i[j].dSInputForgetGate = w.dSInputForgetGate * c.yForget + c.previousCellState * fPrime(c.netForget) * neuFeatures[j].ac; } } //partial derivatives for internal connections c.dSWCellIn = c.dSWCellIn * c.yForget + activationFunctionG(c.netCellState) * fPrime(c.netIn) * c.cellState; //partial derivatives for internal connections, initially zero as dS is zero and previous cell state is zero c.dSWCellForget = c.dSWCellForget * c.yForget + c.previousCellState * fPrime(c.netForget) * c.previousCellState; neuHidden[i] = c; }); //for all output neurons for (int k = 0; k < L2; k++) { //for each connection to the hidden layer double er = neuOutput[k].er; for (int j = 0; j <= L1; j++) { deltaHiddenOutput[j][k] = alpha * neuHidden[j].cellOutput * er; } } //for each hidden neuron Parallel.For(0, L1, parallelOption, i => { LSTMCell c = neuHidden[i]; //find the error by find the product of the output errors and their weight connection. double weightedSum = 0; for (int k = 0; k < L2; k++) { weightedSum += neuOutput[k].er * mat_hidden2output[i][k]; } //using the error find the gradient of the output gate c.gradientOutputGate = fPrime(c.netOut) * activationFunctionH(c.cellState) * weightedSum; //internal cell state error c.cellStateError = c.yOut * weightedSum * hPrime(c.cellState); //weight updates //already done the deltas for the hidden-output connections //output gates. for each connection to the hidden layer //to the input layer LSTMWeight[] w_i = mat_input2hidden[i]; for (int k = 0; k < sparseFeatureSize; k++) { var entry = sparse.GetEntry(k); //updates weights for input to hidden layer if ((counter % 10) == 0) //regularization is done every 10. step { w_i[entry.Key].wInputCell += alpha * c.cellStateError * w_i[entry.Key].dSInputCell - w_i[entry.Key].wInputCell * beta2; w_i[entry.Key].wInputInputGate += alpha * c.cellStateError * w_i[entry.Key].dSInputInputGate - w_i[entry.Key].wInputInputGate * beta2; w_i[entry.Key].wInputForgetGate += alpha * c.cellStateError * w_i[entry.Key].dSInputForgetGate - w_i[entry.Key].wInputForgetGate * beta2; w_i[entry.Key].wInputOutputGate += alpha * c.gradientOutputGate * entry.Value - w_i[entry.Key].wInputOutputGate * beta2; } else { w_i[entry.Key].wInputCell += alpha * c.cellStateError * w_i[entry.Key].dSInputCell; w_i[entry.Key].wInputInputGate += alpha * c.cellStateError * w_i[entry.Key].dSInputInputGate; w_i[entry.Key].wInputForgetGate += alpha * c.cellStateError * w_i[entry.Key].dSInputForgetGate; w_i[entry.Key].wInputOutputGate += alpha * c.gradientOutputGate * entry.Value; } } if (fea_size > 0) { w_i = mat_feature2hidden[i]; for (int j = 0; j < fea_size; j++) { //make the delta equal to the learning rate multiplied by the gradient multipled by the input for the connection //update connection weights if ((counter % 10) == 0) //regularization is done every 10. step { w_i[j].wInputCell += alpha * c.cellStateError * w_i[j].dSInputCell - w_i[j].wInputCell * beta2; w_i[j].wInputInputGate += alpha * c.cellStateError * w_i[j].dSInputInputGate - w_i[j].wInputInputGate * beta2; w_i[j].wInputForgetGate += alpha * c.cellStateError * w_i[j].dSInputForgetGate - w_i[j].wInputForgetGate * beta2; w_i[j].wInputOutputGate += alpha * c.gradientOutputGate * neuFeatures[j].ac - w_i[j].wInputOutputGate * beta2; } else { w_i[j].wInputCell += alpha * c.cellStateError * w_i[j].dSInputCell; w_i[j].wInputInputGate += alpha * c.cellStateError * w_i[j].dSInputInputGate; w_i[j].wInputForgetGate += alpha * c.cellStateError * w_i[j].dSInputForgetGate; w_i[j].wInputOutputGate += alpha * c.gradientOutputGate * neuFeatures[j].ac; } } } //for the internal connection double deltaOutputGateCell = alpha * c.gradientOutputGate * c.cellState; //using internal partial derivative double deltaInputGateCell = alpha * c.cellStateError * c.dSWCellIn; double deltaForgetGateCell = alpha * c.cellStateError * c.dSWCellForget; //update internal weights if ((counter % 10) == 0) //regularization is done every 10. step { c.wCellIn += deltaInputGateCell - c.wCellIn * beta2; c.wCellForget += deltaForgetGateCell - c.wCellForget * beta2; c.wCellOut += deltaOutputGateCell - c.wCellOut * beta2; } else { c.wCellIn += deltaInputGateCell; c.wCellForget += deltaForgetGateCell; c.wCellOut += deltaOutputGateCell; } neuHidden[i] = c; //update weights for hidden to output layer for (int k = 0; k < L2; k++) { if ((counter % 10) == 0) //regularization is done every 10. step { mat_hidden2output[i][k] += deltaHiddenOutput[i][k] - mat_hidden2output[i][k] * beta2; } else { mat_hidden2output[i][k] += deltaHiddenOutput[i][k]; } } }); }
// forward process. output layer consists of tag value public override void computeNet(State state, double[] doutput) { //erase activations for (int a = 0; a < L1; a++) neuHidden[a].ac = 0; //hidden(t-1) -> hidden(t) matrixXvectorADD(neuHidden, neuInput, mat_hiddenBpttWeight, 0, L1, L0 - L1, L0, 0); //inputs(t) -> hidden(t) //Get sparse feature and apply it into hidden layer var sparse = state.GetSparseData(); int n = sparse.GetNumberOfEntries(); for (int i = 0; i < n; i++) { var entry = sparse.GetEntry(i); for (int b = 0; b < L1; b++) { neuHidden[b].ac += entry.Value * mat_input2hidden[b][entry.Key]; } } //fea(t) -> hidden(t) if (fea_size > 0) { matrixXvectorADD(neuHidden, neuFeatures, mat_feature2hidden, 0, L1, 0, fea_size, 0); } //activate 1 --sigmoid computeHiddenActivity(); //initialize output nodes for (int c = 0; c < L2; c++) { neuOutput[c].ac = 0; } matrixXvectorADD(neuOutput, neuHidden, mat_hidden2output, 0, L2, 0, L1, 0); if (doutput != null) { for (int i = 0; i < L2; i++) { doutput[i] = neuOutput[i].ac; } } //activation 2 --softmax on words double sum = 0; //sum is used for normalization: it's better to have larger precision as many numbers are summed together here for (int c = 0; c < L2; c++) { if (neuOutput[c].ac > 50) neuOutput[c].ac = 50; //for numerical stability if (neuOutput[c].ac < -50) neuOutput[c].ac = -50; //for numerical stability double val = Math.Exp(neuOutput[c].ac); sum += val; neuOutput[c].ac = val; } for (int c = 0; c < L2; c++) { neuOutput[c].ac /= sum; } }
public override void LearnBackTime(State state, int numStates, int curState) { int maxBptt = 0; for (maxBptt = 0; maxBptt < bptt + bptt_block - 1; maxBptt++) { if (bptt_inputs[maxBptt] == null) { break; } } //shift memory needed for bptt to next time step for (int a = maxBptt; a > 0; a--) { bptt_inputs[a] = bptt_inputs[a - 1]; Array.Copy(bptt_hidden, (a - 1) * L1, bptt_hidden, a * L1, L1); Array.Copy(bptt_fea, (a - 1) * DenseFeatureSize, bptt_fea, a * DenseFeatureSize, DenseFeatureSize); } bptt_inputs[0] = state.SparseData; //Save hidden and feature layer nodes values for bptt Array.Copy(neuHidden, 0, bptt_hidden, 0, L1); for (int i = 0; i < DenseFeatureSize; i++) { bptt_fea[i] = neuFeatures[i]; } // time to learn bptt if (((curState % bptt_block) == 0) || (curState == numStates - 1)) { learnBptt(state); } }
void learnBptt(State state) { for (int step = 0; step < bptt + bptt_block - 2; step++) { if (null == bptt_inputs[step]) break; var sparse = bptt_inputs[step]; Parallel.For(0, L1, parallelOption, a => { // compute hidden layer gradient neuHidden[a].er *= neuHidden[a].cellOutput * (1 - neuHidden[a].cellOutput); //dense weight update fea->0 if (DenseFeatureSize > 0) { for (int i = 0; i < DenseFeatureSize; i++) { mat_bptt_synf[a][i] += neuHidden[a].er * bptt_fea[i + step * DenseFeatureSize]; } } //sparse weight update hidden->input for (int i = 0; i < sparse.GetNumberOfEntries(); i++) { mat_bptt_syn0_w[a][sparse.GetEntry(i).Key] += neuHidden[a].er * sparse.GetEntry(i).Value; } //bptt weight update for (int i = 0; i < L1; i++) { mat_bptt_syn0_ph[a][i] += neuHidden[a].er * neuLastHidden[i].cellOutput; } }); //propagates errors hidden->input to the recurrent part matrixXvectorADD(neuLastHidden, neuHidden, mat_hiddenBpttWeight, 0, L1, 0, L1, 1); for (int a = 0; a < L1; a++) { //propagate error from time T-n to T-n-1 neuHidden[a].er = neuLastHidden[a].er + bptt_hidden[(step + 1) * L1 + a].er; } if (step < bptt + bptt_block - 3) { for (int a = 0; a < L1; a++) { neuHidden[a].cellOutput = bptt_hidden[(step + 1) * L1 + a].cellOutput; neuLastHidden[a].cellOutput = bptt_hidden[(step + 2) * L1 + a].cellOutput; } } } for (int b = 0; b < L1; b++) { neuHidden[b].cellOutput = bptt_hidden[b].cellOutput; //restore hidden layer after bptt } Parallel.For(0, L1, parallelOption, b => { //Update bptt feature weights for (int i = 0; i < L1; i++) { mat_hiddenBpttWeight[b][i] += LearningRate * mat_bptt_syn0_ph[b][i]; //Clean bptt weight error mat_bptt_syn0_ph[b][i] = 0; } //Update dense feature weights if (DenseFeatureSize > 0) { for (int i = 0; i < DenseFeatureSize; i++) { mat_feature2hidden[b][i] += LearningRate * mat_bptt_synf[b][i]; //Clean dense feature weights error mat_bptt_synf[b][i] = 0; } } //Update sparse feature weights for (int step = 0; step < bptt + bptt_block - 2; step++) { if (null == bptt_inputs[step]) break; var sparse = bptt_inputs[step]; for (int i = 0; i < sparse.GetNumberOfEntries(); i++) { int pos = sparse.GetEntry(i).Key; mat_input2hidden[b][pos] += LearningRate * mat_bptt_syn0_w[b][pos]; //Clean sparse feature weight error mat_bptt_syn0_w[b][pos] = 0; } } }); }
public override void learnNet(State state, int timeat, bool biRNN = false) { if (biRNN == false) { CalculateOutputLayerError(state, timeat); } //error output->hidden for words from specific class matrixXvectorADD(neuHidden, OutputLayer, Hidden2OutputWeight, 0, L1, 0, L2, 1); //Apply drop out on error in hidden layer for (int i = 0; i < L1; i++) { if (neuHidden[i].mask == true) { neuHidden[i].er = 0; } } //Update hidden-output weights Parallel.For(0, L1, parallelOption, a => { for (int c = 0; c < L2; c++) { Hidden2OutputWeight[c][a] += LearningRate * OutputLayer[c].er * neuHidden[a].cellOutput; } }); }
// forward process. output layer consists of tag value public override void computeNet(State state, double[] doutput, bool isTrain = true) { //keep last hidden layer and erase activations neuLastHidden = neuHidden; //hidden(t-1) -> hidden(t) neuHidden = new neuron[L1]; matrixXvectorADD(neuHidden, neuLastHidden, mat_hiddenBpttWeight, 0, L1, 0, L1, 0); //Apply feature values on hidden layer var sparse = state.SparseData; int n = sparse.GetNumberOfEntries(); Parallel.For(0, L1, parallelOption, b => { //Sparse features: //inputs(t) -> hidden(t) //Get sparse feature and apply it into hidden layer for (int i = 0; i < n; i++) { var entry = sparse.GetEntry(i); neuHidden[b].cellOutput += entry.Value * mat_input2hidden[b][entry.Key]; } //Dense features: //fea(t) -> hidden(t) if (DenseFeatureSize > 0) { for (int j = 0; j < DenseFeatureSize; j++) { neuHidden[b].cellOutput += neuFeatures[j] * mat_feature2hidden[b][j]; } } }); //activate 1 --sigmoid computeHiddenActivity(isTrain); //Calculate output layer matrixXvectorADD(OutputLayer, neuHidden, Hidden2OutputWeight, 0, L2, 0, L1, 0); if (doutput != null) { for (int i = 0; i < L2; i++) { doutput[i] = OutputLayer[i].cellOutput; } } //activation 2 --softmax on words SoftmaxLayer(OutputLayer); }
// forward process. output layer consists of tag value public override void computeNet(State state, double[] doutput, bool isTrain = true) { //inputs(t) -> hidden(t) //Get sparse feature and apply it into hidden layer var sparse = state.SparseData; int sparseFeatureSize = sparse.GetNumberOfEntries(); Parallel.For(0, L1, parallelOption, j => { LSTMCell cell_j = neuHidden[j]; //hidden(t-1) -> hidden(t) cell_j.previousCellState = cell_j.cellState; //rest the value of the net input to zero cell_j.netIn = 0; cell_j.netForget = 0; //reset each netCell state to zero cell_j.netCellState = 0; //reset each netOut to zero cell_j.netOut = 0; for (int i = 0; i < sparseFeatureSize; i++) { var entry = sparse.GetEntry(i); LSTMWeight w = input2hidden[j][entry.Key]; //loop through all forget gates in hiddden layer cell_j.netIn += entry.Value * w.wInputInputGate; cell_j.netForget += entry.Value * w.wInputForgetGate; cell_j.netCellState += entry.Value * w.wInputCell; cell_j.netOut += entry.Value * w.wInputOutputGate; } //fea(t) -> hidden(t) if (DenseFeatureSize > 0) { for (int i = 0; i < DenseFeatureSize; i++) { LSTMWeight w = feature2hidden[j][i]; cell_j.netIn += neuFeatures[i] * w.wInputInputGate; cell_j.netForget += neuFeatures[i] * w.wInputForgetGate; cell_j.netCellState += neuFeatures[i] * w.wInputCell; cell_j.netOut += neuFeatures[i] * w.wInputOutputGate; } } //include internal connection multiplied by the previous cell state cell_j.netIn += cell_j.previousCellState * cell_j.wCellIn; //squash input cell_j.yIn = (float)Sigmoid(cell_j.netIn); //include internal connection multiplied by the previous cell state cell_j.netForget += cell_j.previousCellState * cell_j.wCellForget; cell_j.yForget = (float)Sigmoid(cell_j.netForget); if (cell_j.mask == true) { cell_j.cellState = 0; } else { //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate cell_j.cellState = (float)(cell_j.yForget * cell_j.previousCellState + cell_j.yIn * Sigmoid2(cell_j.netCellState)); } ////include the internal connection multiplied by the CURRENT cell state cell_j.netOut += cell_j.cellState * cell_j.wCellOut; //squash output gate cell_j.yOut = (float)(Sigmoid(cell_j.netOut)); cell_j.cellOutput = cell_j.cellState * cell_j.yOut; neuHidden[j] = cell_j; }); matrixXvectorADD(OutputLayer, neuHidden, Hidden2OutputWeight, 0, L2, 0, L1); if (doutput != null) { for (int i = 0; i < L2; i++) { doutput[i] = OutputLayer[i].cellOutput; } } //activation 2 --softmax on words SoftmaxLayer(OutputLayer); }
public override void learnNet(State state, int timeat, bool biRNN = false) { //create delta list if (biRNN == false) { CalculateOutputLayerError(state, timeat); } //Get sparse feature and apply it into hidden layer var sparse = state.SparseData; int sparseFeatureSize = sparse.GetNumberOfEntries(); //put variables for derivaties in weight class and cell class Parallel.For(0, L1, parallelOption, i => { LSTMWeightDerivative[] w_i = input2hiddenDeri[i]; LSTMCell c = neuHidden[i]; float Sigmoid2Derivative_ci_netCellState_mul_ci_yIn = (float)(Sigmoid2Derivative(c.netCellState) * c.yIn); float Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn = (float)(Sigmoid2(c.netCellState) * SigmoidDerivative(c.netIn)); float ci_previousCellState_mul_SigmoidDerivative_ci_netForget = (float)(c.previousCellState * SigmoidDerivative(c.netForget)); for (int k = 0; k < sparseFeatureSize; k++) { var entry = sparse.GetEntry(k); LSTMWeightDerivative w = w_i[entry.Key]; w_i[entry.Key].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative_ci_netCellState_mul_ci_yIn * entry.Value; w_i[entry.Key].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * entry.Value; w_i[entry.Key].dSInputForgetGate = w.dSInputForgetGate * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * entry.Value; } if (DenseFeatureSize > 0) { w_i = feature2hiddenDeri[i]; for (int j = 0; j < DenseFeatureSize; j++) { LSTMWeightDerivative w = w_i[j]; w_i[j].dSInputCell = w.dSInputCell * c.yForget + Sigmoid2Derivative_ci_netCellState_mul_ci_yIn * neuFeatures[j]; w_i[j].dSInputInputGate = w.dSInputInputGate * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * neuFeatures[j]; w_i[j].dSInputForgetGate = w.dSInputForgetGate * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * neuFeatures[j]; } } //partial derivatives for internal connections c.dSWCellIn = c.dSWCellIn * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * c.cellState; //partial derivatives for internal connections, initially zero as dS is zero and previous cell state is zero c.dSWCellForget = c.dSWCellForget * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * c.previousCellState; neuHidden[i] = c; }); //for each hidden neuron Parallel.For(0, L1, parallelOption, i => { LSTMCell c = neuHidden[i]; //find the error by find the product of the output errors and their weight connection. var weightedSum = 0.0; for (int k = 0; k < L2; k++) { weightedSum += OutputLayer[k].er * Hidden2OutputWeight[k][i]; } weightedSum = NormalizeErr(weightedSum); //using the error find the gradient of the output gate var gradientOutputGate = (float)(LearningRate * SigmoidDerivative(c.netOut) * c.cellState * weightedSum); //internal cell state error var cellStateError = (float)(LearningRate * c.yOut * weightedSum); //weight updates LSTMWeight[] w_i = input2hidden[i]; LSTMWeightDerivative[] wd_i = input2hiddenDeri[i]; for (int k = 0; k < sparseFeatureSize; k++) { var entry = sparse.GetEntry(k); //updates weights for input to hidden layer w_i[entry.Key].wInputCell += cellStateError * wd_i[entry.Key].dSInputCell; w_i[entry.Key].wInputInputGate += cellStateError * wd_i[entry.Key].dSInputInputGate; w_i[entry.Key].wInputForgetGate += cellStateError * wd_i[entry.Key].dSInputForgetGate; w_i[entry.Key].wInputOutputGate += gradientOutputGate * entry.Value; } if (DenseFeatureSize > 0) { w_i = feature2hidden[i]; wd_i = feature2hiddenDeri[i]; for (int j = 0; j < DenseFeatureSize; j++) { //make the delta equal to the learning rate multiplied by the gradient multipled by the input for the connection //update connection weights w_i[j].wInputCell += cellStateError * wd_i[j].dSInputCell; w_i[j].wInputInputGate += cellStateError * wd_i[j].dSInputInputGate; w_i[j].wInputForgetGate += cellStateError * wd_i[j].dSInputForgetGate; w_i[j].wInputOutputGate += gradientOutputGate * neuFeatures[j]; } } //update internal weights c.wCellIn += cellStateError * c.dSWCellIn; c.wCellForget += cellStateError * c.dSWCellForget; c.wCellOut += gradientOutputGate * c.cellState; neuHidden[i] = c; }); //update weights for hidden to output layer Parallel.For(0, L1, parallelOption, i => { for (int k = 0; k < L2; k++) { Hidden2OutputWeight[k][i] += (float)(LearningRate * neuHidden[i].cellOutput * OutputLayer[k].er); } }); }
public override void learnNet(State state, int timeat) { if (m_bCRFTraining == true) { //For RNN-CRF, use joint probability of output layer nodes and transition between contigous nodes for (int c = 0; c < L2; c++) { neuOutput[c].er = -m_Diff[timeat][c]; } neuOutput[state.GetLabel()].er = 1 - m_Diff[timeat][state.GetLabel()]; } else { //For standard RNN for (int c = 0; c < L2; c++) { neuOutput[c].er = -neuOutput[c].ac; } neuOutput[state.GetLabel()].er = 1 - neuOutput[state.GetLabel()].ac; } for (int a = 0; a < L1; a++) { neuHidden[a].er = 0; } matrixXvectorADD(neuHidden, neuOutput, mat_hidden2output, 0, L2, 0, L1, 1); //error output->hidden for words from specific class Parallel.For(0, L2, parallelOption, c => { for (int a = 0; a < L1; a++) { double dg = neuOutput[c].er * neuHidden[a].ac; if ((counter % 10) == 0) //regularization is done every 10. step { mat_hidden2output[c][a] += alpha * (dg - mat_hidden2output[c][a] * beta); } else { mat_hidden2output[c][a] += alpha * dg; } } }); }
void learnBptt(State state) { for (int step = 0; step < bptt + bptt_block - 2; step++) { if (null == bptt_inputs[step]) break; // compute hidden layter gradient for (int a = 0; a < L1; a++) { neuHidden[a].er *= neuHidden[a].ac * (1 - neuHidden[a].ac); } //weight update fea->0 if (fea_size > 0) { Parallel.For(0, L1, parallelOption, b => { for (int a = 0; a < fea_size; a++) { mat_bptt_synf[b][a] += neuHidden[b].er * bptt_fea[a + step * fea_size].ac; } }); } //weight update hidden->input var sparse = bptt_inputs[step]; Parallel.For(0, L1, parallelOption, b => { for (int i = 0; i < sparse.GetNumberOfEntries(); i++) { mat_bptt_syn0_w[b][sparse.GetEntry(i).Key] += neuHidden[b].er * sparse.GetEntry(i).Value; } }); for (int a = L0 - L1; a < L0; a++) { neuInput[a].er = 0; } matrixXvectorADD(neuInput, neuHidden, mat_hiddenBpttWeight, 0, L1, L0 - L1, L0, 1); //propagates errors hidden->input to the recurrent part Parallel.For(0, L1, parallelOption, b => { for (int a = 0; a < L1; a++) { mat_bptt_syn0_ph[b][a] += neuHidden[b].er * neuInput[L0 - L1 + a].ac; } }); for (int a = 0; a < L1; a++) { //propagate error from time T-n to T-n-1 neuHidden[a].er = neuInput[a + L0 - L1].er + bptt_hidden[(step + 1) * L1 + a].er; } if (step < bptt + bptt_block - 3) { for (int a = 0; a < L1; a++) { neuHidden[a].ac = bptt_hidden[(step + 1) * L1 + a].ac; neuInput[a + L0 - L1].ac = bptt_hidden[(step + 2) * L1 + a].ac; } } } for (int a = 0; a < (bptt + bptt_block) * L1; a++) { bptt_hidden[a].er = 0; } for (int b = 0; b < L1; b++) { neuHidden[b].ac = bptt_hidden[b].ac; //restore hidden layer after bptt } UpdateWeights(mat_hiddenBpttWeight, mat_bptt_syn0_ph); if (fea_size > 0) { UpdateWeights(mat_feature2hidden, mat_bptt_synf); } Parallel.For(0, L1, parallelOption, b => { for (int step = 0; step < bptt + bptt_block - 2; step++) { if (null == bptt_inputs[step]) break; var sparse = bptt_inputs[step]; for (int i = 0; i < sparse.GetNumberOfEntries(); i++) { int pos = sparse.GetEntry(i).Key; if ((counter % 10) == 0) { mat_input2hidden[b][pos] += alpha * (mat_bptt_syn0_w[b][pos] - mat_input2hidden[b][pos] * beta); } else { mat_input2hidden[b][pos] += alpha * mat_bptt_syn0_w[b][pos]; } mat_bptt_syn0_w[b][pos] = 0; } } }); }
public void SetInputLayer(State state, int curState, int numStates, int[] predicted, bool forward = true) { if (predicted != null && state.RuntimeFeatures != null) { // set runtime feature for (int i = 0; i < state.RuntimeFeatures.Length; i++) { for (int j = 0; j < OutputLayer.LayerSize; j++) { //Clean up run time feature value and then set a new one state.SetRuntimeFeature(i, j, 0); } int pos = curState + ((forward == true) ? 1 : -1) * state.RuntimeFeatures[i].OffsetToCurrentState; if (pos >= 0 && pos < numStates) { state.SetRuntimeFeature(i, predicted[pos], 1); } } } }
public override void LearnBackTime(State state, int numStates, int curState) { }
void ExtractSparseFeature(int currentState, int numStates, List<string[]> features, State pState) { Dictionary<int, float> sparseFeature = new Dictionary<int, float>(); int start = 0; var fc = m_FeatureConfiguration; //Extract TFeatures in given context window if (m_TFeaturizer != null) { if (fc.ContainsKey(TFEATURE_CONTEXT) == true) { List<int> v = fc[TFEATURE_CONTEXT]; for (int j = 0; j < v.Count; j++) { int offset = TruncPosition(currentState + v[j], 0, numStates); List<int> tfeatureList = m_TFeaturizer.GetFeatureIds(features, offset); foreach (int featureId in tfeatureList) { if (m_TFeatureWeightType == TFEATURE_WEIGHT_TYPE_ENUM.BINARY) { sparseFeature[start + featureId] = 1; } else { if (sparseFeature.ContainsKey(start + featureId) == false) { sparseFeature.Add(start + featureId, 1); } else { sparseFeature[start + featureId]++; } } } start += m_TFeaturizer.GetFeatureSize(); } } } // Create place hold for run time feature // The real feature value is calculated at run time if (fc.ContainsKey(RT_FEATURE_CONTEXT) == true) { List<int> v = fc[RT_FEATURE_CONTEXT]; pState.RuntimeFeatures = new PriviousLabelFeature[v.Count]; for (int j = 0; j < v.Count; j++) { if (v[j] < 0) { pState.AddRuntimeFeaturePlacehold(j, v[j], sparseFeature.Count, start); sparseFeature[start] = 0; //Placehold a position start += TagSet.GetSize(); } else { throw new Exception("The offset of run time feature should be negative."); } } } SparseVector spSparseFeature = pState.SparseData; spSparseFeature.SetDimension(m_SparseDimension); spSparseFeature.SetData(sparseFeature); }
// forward process. output layer consists of tag value public override void computeNet(State state, double[] doutput) { //inputs(t) -> hidden(t) //Get sparse feature and apply it into hidden layer var sparse = state.GetSparseData(); int sparseFeatureSize = sparse.GetNumberOfEntries(); //loop through all input gates in hidden layer //for each hidden neuron Parallel.For(0, L1, parallelOption, j => { //rest the value of the net input to zero neuHidden[j].netIn = 0; //hidden(t-1) -> hidden(t) neuHidden[j].previousCellState = neuHidden[j].cellState; //for each input neuron for (int i = 0; i < sparseFeatureSize; i++) { var entry = sparse.GetEntry(i); neuHidden[j].netIn += entry.Value * mat_input2hidden[j][entry.Key].wInputInputGate; } }); //fea(t) -> hidden(t) if (fea_size > 0) { matrixXvectorADD(neuHidden, neuFeatures, mat_feature2hidden, 0, L1, 0, fea_size); } Parallel.For(0, L1, parallelOption, j => { LSTMCell cell_j = neuHidden[j]; //include internal connection multiplied by the previous cell state cell_j.netIn += cell_j.previousCellState * cell_j.wCellIn; //squash input cell_j.yIn = activationFunctionF(cell_j.netIn); cell_j.netForget = 0; //reset each netCell state to zero cell_j.netCellState = 0; //reset each netOut to zero cell_j.netOut = 0; for (int i = 0; i < sparseFeatureSize; i++) { var entry = sparse.GetEntry(i); LSTMWeight w = mat_input2hidden[j][entry.Key]; //loop through all forget gates in hiddden layer cell_j.netForget += entry.Value * w.wInputForgetGate; cell_j.netCellState += entry.Value * w.wInputCell; cell_j.netOut += entry.Value * w.wInputOutputGate; } if (fea_size > 0) { for (int i = 0; i < fea_size; i++) { LSTMWeight w = mat_feature2hidden[j][i]; cell_j.netForget += neuFeatures[i].ac * w.wInputForgetGate; cell_j.netCellState += neuFeatures[i].ac * w.wInputCell; cell_j.netOut += neuFeatures[i].ac * w.wInputOutputGate; } } //include internal connection multiplied by the previous cell state cell_j.netForget += cell_j.previousCellState * cell_j.wCellForget; cell_j.yForget = activationFunctionF(cell_j.netForget); //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * activationFunctionG(cell_j.netCellState); //include the internal connection multiplied by the CURRENT cell state cell_j.netOut += cell_j.cellState * cell_j.wCellOut; //squash output gate cell_j.yOut = activationFunctionF(cell_j.netOut); cell_j.cellOutput = activationFunctionH(cell_j.cellState) * cell_j.yOut; neuHidden[j] = cell_j; }); //initialize output nodes for (int c = 0; c < L2; c++) { neuOutput[c].ac = 0; } matrixXvectorADD(neuOutput, neuHidden, mat_hidden2output, 0, L2, 0, L1); if (doutput != null) { for (int i = 0; i < L2; i++) { doutput[i] = neuOutput[i].ac; } } //activation 2 --softmax on words double sum = 0; //sum is used for normalization: it's better to have larger precision as many numbers are summed together here for (int c = 0; c < L2; c++) { if (neuOutput[c].ac > 50) neuOutput[c].ac = 50; //for numerical stability if (neuOutput[c].ac < -50) neuOutput[c].ac = -50; //for numerical stability double val = Math.Exp(neuOutput[c].ac); sum += val; neuOutput[c].ac = val; } for (int c = 0; c < L2; c++) { neuOutput[c].ac /= sum; } }
public override void learnNet(State state, int timeat, bool biRNN = false) { }