private void UpdateGateWeights(int curState, LSTMGateWeight gateWeight, int i, float featureDerivate, float c_yForget, float err) { var j = 0; float[] deri_i = gateWeight.deri[i]; float[] learningrate_i = gateWeight.learningRate[i]; float[] weights_i = gateWeight.weights[i]; while (j < DenseFeatureSize - Vector <float> .Count) { var feature = new Vector <float>(DenseFeature, j); var wd = feature * featureDerivate; if (curState > 0) { var wd_i = new Vector <float>(deri_i, j); wd += wd_i * c_yForget; } wd.CopyTo(deri_i, j); Vector <float> vecDelta = wd * err; vecDelta = RNNHelper.NormalizeGradient(vecDelta); var wlr_i = new Vector <float>(learningrate_i, j); var vecLearningRate = ComputeLearningRate(vecDelta, ref wlr_i); var w_i = new Vector <float>(weights_i, j); w_i += vecLearningRate * vecDelta; w_i.CopyTo(weights_i, j); wlr_i.CopyTo(learningrate_i, j); j += Vector <float> .Count; } while (j < DenseFeatureSize) { var wd = DenseFeature[j] * featureDerivate; if (curState > 0) { wd += deri_i[j] * c_yForget; } deri_i[j] = wd; float delta = wd * err; delta = RNNHelper.NormalizeGradient(delta); var wlr_i = learningrate_i[j]; var learningRate = ComputeLearningRate(delta, ref wlr_i); weights_i[j] += learningRate * delta; learningrate_i[j] = wlr_i; j++; } }
public static void matrixXvectorADDErr(double[] dest, double[] srcvec, Matrix <double> srcmatrix, int DestSize, int SrcSize) { Parallel.For(0, DestSize, i => { double er = 0; for (int j = 0; j < SrcSize; j++) { er += srcvec[j] * srcmatrix[j][i]; } dest[i] = RNNHelper.NormalizeGradient(er); }); }
public static void matrixXvectorADDErr(double[] dest, double[] srcvec, Matrix <double> srcmatrix, int DestSize, HashSet <int> setSkipSampling) { Parallel.For(0, DestSize, i => { double er = 0; foreach (int j in setSkipSampling) { er += srcvec[j] * srcmatrix[j][i]; } dest[i] = RNNHelper.NormalizeGradient(er); }); }
public static void matrixXvectorADDErr(double[] dest, double[] srcvec, Matrix <double> srcmatrix, HashSet <int> setSkipSampling, int SrcSize) { Parallel.ForEach(setSkipSampling, i => { double er = 0; for (int j = 0; j < SrcSize; j++) { er += srcvec[j] * srcmatrix[j][i]; } dest[i] = RNNHelper.NormalizeGradient(er); }); }
/// <summary> /// Update weights /// </summary> public void UpdateWeights() { Vector <float> vecMiniBatchSize = new Vector <float>(RNNHelper.MiniBatchSize); for (var i = 0; i < layerSize; i++) { var j = 0; var weights_i = weights[i]; var weightsDelta_i = weightsDelta[i]; var learningrate_i = learningRate[i]; var moreItems = (denseFeatureSize % Vector <float> .Count); while (j < denseFeatureSize - moreItems) { //Vectorize weights delta Vector <float> vecDelta = new Vector <float>(weightsDelta_i, j); Vector <float> .Zero.CopyTo(weightsDelta_i, j); //Normalize weights delta vecDelta = vecDelta / vecMiniBatchSize; vecDelta = RNNHelper.NormalizeGradient(vecDelta); //Get learning rate dymanticly var wlr_i = new Vector <float>(learningrate_i, j); var vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref wlr_i); wlr_i.CopyTo(learningrate_i, j); //Update weights Vector <float> vecWeights = new Vector <float>(weights_i, j); vecWeights += vecLearningRate * vecDelta; vecWeights.CopyTo(weights_i, j); j += Vector <float> .Count; } while (j < denseFeatureSize) { var delta = weightsDelta_i[j]; weightsDelta_i[j] = 0; delta = delta / RNNHelper.MiniBatchSize; delta = RNNHelper.NormalizeGradient(delta); var wlr_i = learningrate_i[j]; var learningRate = ComputeLearningRate(delta, ref wlr_i); learningrate_i[j] = wlr_i; weights_i[j] += learningRate * delta; j++; } } }
public virtual void LearnFeatureWeights(int numStates, int curState) { //Update hidden-output weights Parallel.For(0, LayerSize, parallelOption, c => { double er2 = er[c]; double[] vector_c = DenseWeights[c]; for (int a = 0; a < DenseFeatureSize; a++) { double delta = RNNHelper.NormalizeGradient(er2 * DenseFeature[a]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, c, a, delta); vector_c[a] += newLearningRate * delta; } }); }
public override void LearnFeatureWeights(int numStates, int curState) { //Update hidden-output weights Parallel.ForEach(negativeSampleWordList, c => { double er2 = er[c]; double[] vector_c = DenseWeights[c]; for (int a = 0; a < DenseFeatureSize; a++) { double delta = RNNHelper.NormalizeGradient(er2 * DenseFeature[a]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, c, a, delta); vector_c[a] += newLearningRate * delta; } }); }
public override void BackwardPass(int numStates, int curState) { if (DenseFeatureSize > 0) { //Update hidden-output weights Parallel.ForEach(negativeSampleWordList, c => { var err = Err[c]; var featureWeightCol = DenseWeights[c]; var featureWeightsLearningRateCol = DenseWeightsLearningRate[c]; var j = 0; while (j < DenseFeatureSize - Vector <float> .Count) { RNNHelper.UpdateFeatureWeights(DenseFeature, featureWeightCol, featureWeightsLearningRateCol, err, j); j += Vector <float> .Count; } while (j < DenseFeatureSize) { var delta = RNNHelper.NormalizeGradient(err * DenseFeature[j]); var newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, c, j, delta); featureWeightCol[j] += newLearningRate * delta; j++; } }); } if (SparseFeatureSize > 0) { //Update hidden-output weights Parallel.ForEach(negativeSampleWordList, c => { var er2 = Err[c]; var vector_c = SparseWeights[c]; foreach (var pair in SparseFeature) { var pos = pair.Key; var val = pair.Value; var delta = RNNHelper.NormalizeGradient(er2 * val); var newLearningRate = RNNHelper.UpdateLearningRate(SparseWeightsLearningRate, c, pos, delta); vector_c[pos] += newLearningRate * delta; } }); } }
public override void ComputeLayerErr(SimpleLayer nextLayer) { var layer = nextLayer as LSTMLayer; if (layer != null) { Parallel.For(0, LayerSize, parallelOption, i => { var err = 0.0f; for (var k = 0; k < nextLayer.LayerSize; k++) { err += layer.Err[k] * layer.wDenseOutputGate.weights[k][i]; } Err[i] = RNNHelper.NormalizeGradient(err); }); } else { base.ComputeLayerErr(nextLayer); } }
public override void LearnFeatureWeights(int numStates, int curState) { if (DenseFeatureSize > 0) { //Update hidden-output weights Parallel.ForEach(negativeSampleWordList, c => { double er2 = er[c]; double[] vector_c = DenseWeights[c]; for (int a = 0; a < DenseFeatureSize; a++) { double delta = RNNHelper.NormalizeGradient(er2 * DenseFeature[a]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, c, a, delta); vector_c[a] += newLearningRate * delta; } }); } if (SparseFeatureSize > 0) { //Update hidden-output weights Parallel.ForEach(negativeSampleWordList, c => { double er2 = er[c]; double[] vector_c = SparseWeights[c]; foreach (KeyValuePair <int, float> pair in SparseFeature) { int pos = pair.Key; double val = pair.Value; double delta = RNNHelper.NormalizeGradient(er2 * val); double newLearningRate = RNNHelper.UpdateLearningRate(SparseWeightsLearningRate, c, pos, delta); vector_c[pos] += newLearningRate * delta; } }); } }
public virtual void LearnFeatureWeights(int numStates, int curState) { if (DenseFeatureSize > 0) { //Update hidden-output weights Parallel.For(0, LayerSize, parallelOption, c => { double er2 = er[c]; double[] vector_c = DenseWeights[c]; for (int a = 0; a < DenseFeatureSize; a++) { double delta = RNNHelper.NormalizeGradient(er2 * DenseFeature[a]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, c, a, delta); vector_c[a] += newLearningRate * delta; } }); } if (SparseFeatureSize > 0) { //Update hidden-output weights Parallel.For(0, LayerSize, parallelOption, c => { double er2 = er[c]; double[] vector_c = SparseWeights[c]; for (int a = 0; a < SparseFeature.Count; a++) { int pos = SparseFeature.GetEntry(a).Key; double val = SparseFeature.GetEntry(a).Value; double delta = RNNHelper.NormalizeGradient(er2 * val); double newLearningRate = RNNHelper.UpdateLearningRate(SparseWeightsLearningRate, c, pos, delta); vector_c[pos] += newLearningRate * delta; } }); } }
private void UpdateOutputGateWeights(LSTMGateWeight gateWeight, int i, float err) { var j = 0; float[] learningrate_i = gateWeight.learningRate[i]; float[] weights_i = gateWeight.weights[i]; while (j < DenseFeatureSize - Vector <float> .Count) { Vector <float> vecDelta = new Vector <float>(DenseFeature, j); vecDelta = vecDelta * err; vecDelta = RNNHelper.NormalizeGradient(vecDelta); var wlr_i = new Vector <float>(learningrate_i, j); var vecLearningRate = ComputeLearningRate(vecDelta, ref wlr_i); var w_i = new Vector <float>(weights_i, j); w_i += vecLearningRate * vecDelta; w_i.CopyTo(weights_i, j); wlr_i.CopyTo(learningrate_i, j); j += Vector <float> .Count; } while (j < DenseFeatureSize) { float delta = DenseFeature[j] * err; delta = RNNHelper.NormalizeGradient(delta); var wlr_i = learningrate_i[j]; var learningRate = ComputeLearningRate(delta, ref wlr_i); weights_i[j] += learningRate * delta; learningrate_i[j] = wlr_i; j++; } }
private void learnBptt() { for (int step = 0; step < bptt + bptt_block - 2; step++) { if (null == bptt_inputs[step] && null == bptt_fea[step]) { break; } var sparse = bptt_inputs[step]; var bptt_fea_step = bptt_fea[step]; var last_bptt_hidden = bptt_hidden[step + 1]; var last_last_bptt_hidden = bptt_hidden[step + 2]; Parallel.For(0, LayerSize, parallelOption, a => { // compute hidden layer gradient er[a] *= cellOutput[a] * (1 - cellOutput[a]); //dense weight update fea->0 double[] vector_a = null; double er2 = er[a]; Vector <double> vecErr = new Vector <double>(er2); int i = 0; if (DenseFeatureSize > 0) { vector_a = DenseWeightsDelta[a]; i = 0; while (i < DenseFeatureSize - Vector <double> .Count) { Vector <double> v1 = new Vector <double>(bptt_fea_step, i); Vector <double> v2 = new Vector <double>(vector_a, i); v2 += vecErr * v1; v2.CopyTo(vector_a, i); i += Vector <double> .Count; } while (i < DenseFeatureSize) { vector_a[i] += er2 * bptt_fea_step[i]; i++; } } if (SparseFeatureSize > 0) { //sparse weight update hidden->input vector_a = SparseWeightsDelta[a]; for (i = 0; i < sparse.Count; i++) { var entry = sparse.GetEntry(i); vector_a[entry.Key] += er2 * entry.Value; } } //bptt weight update vector_a = BpttWeightsDelta[a]; i = 0; while (i < LayerSize - Vector <double> .Count) { Vector <double> v1 = new Vector <double>(previousCellOutput, i); Vector <double> v2 = new Vector <double>(vector_a, i); v2 += vecErr * v1; v2.CopyTo(vector_a, i); i += Vector <double> .Count; } while (i < LayerSize) { vector_a[i] += er2 * previousCellOutput[i]; i++; } }); //propagates errors hidden->input to the recurrent part double[] previousHiddenErr = new double[LayerSize]; RNNHelper.matrixXvectorADDErr(previousHiddenErr, er, BpttWeights, LayerSize, LayerSize); for (int a = 0; a < LayerSize; a++) { //propagate error from time T-n to T-n-1 er[a] = previousHiddenErr[a] + last_bptt_hidden.er[a]; } if (step < bptt + bptt_block - 3) { for (int a = 0; a < LayerSize; a++) { cellOutput[a] = last_bptt_hidden.cellOutput[a]; previousCellOutput[a] = last_last_bptt_hidden.cellOutput[a]; } } } //restore hidden layer after bptt bptt_hidden[0].cellOutput.CopyTo(cellOutput, 0); Parallel.For(0, LayerSize, parallelOption, b => { double[] vector_b = null; double[] vector_bf = null; double[] vector_lr = null; //Update bptt feature weights vector_b = BpttWeights[b]; vector_bf = BpttWeightsDelta[b]; vector_lr = BpttWeightsLearningRate[b]; int i = 0; while (i < LayerSize - Vector <double> .Count) { Vector <double> vecDelta = new Vector <double>(vector_bf, i); Vector <double> vecLearningRateWeights = new Vector <double>(vector_lr, i); Vector <double> vecB = new Vector <double>(vector_b, i); //Normalize delta vecDelta = RNNHelper.NormalizeGradient(vecDelta); //Computing learning rate and update its weights Vector <double> vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref vecLearningRateWeights); vecLearningRateWeights.CopyTo(vector_lr, i); //Update weights vecB += vecLearningRate * vecDelta; vecB.CopyTo(vector_b, i); //Clean weights Vector <double> .Zero.CopyTo(vector_bf, i); i += Vector <double> .Count; } while (i < LayerSize) { double delta = RNNHelper.NormalizeGradient(vector_bf[i]); double newLearningRate = RNNHelper.UpdateLearningRate(BpttWeightsLearningRate, b, i, delta); vector_b[i] += newLearningRate * delta; //Clean bptt weight error vector_bf[i] = 0; i++; } //Update dense feature weights if (DenseFeatureSize > 0) { vector_b = DenseWeights[b]; vector_bf = DenseWeightsDelta[b]; vector_lr = DenseWeightsLearningRate[b]; i = 0; while (i < DenseFeatureSize - Vector <double> .Count) { Vector <double> vecDelta = new Vector <double>(vector_bf, i); Vector <double> vecLearningRateWeights = new Vector <double>(vector_lr, i); Vector <double> vecB = new Vector <double>(vector_b, i); //Normalize delta vecDelta = RNNHelper.NormalizeGradient(vecDelta); //Computing learning rate and update its weights Vector <double> vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref vecLearningRateWeights); vecLearningRateWeights.CopyTo(vector_lr, i); //Update weights vecB += vecLearningRate * vecDelta; vecB.CopyTo(vector_b, i); //Clean weights vecDelta = Vector <double> .Zero; vecDelta.CopyTo(vector_bf, i); i += Vector <double> .Count; } while (i < DenseFeatureSize) { double delta = RNNHelper.NormalizeGradient(vector_bf[i]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, b, i, delta); vector_b[i] += newLearningRate * delta; //Clean dense feature weights error vector_bf[i] = 0; i++; } } if (SparseFeatureSize > 0) { //Update sparse feature weights vector_b = SparseWeights[b]; vector_bf = SparseWeightsDelta[b]; for (int step = 0; step < bptt + bptt_block - 2; step++) { var sparse = bptt_inputs[step]; if (sparse == null) { break; } for (i = 0; i < sparse.Count; i++) { int pos = sparse.GetEntry(i).Key; double delta = RNNHelper.NormalizeGradient(vector_bf[pos]); double newLearningRate = RNNHelper.UpdateLearningRate(SparseWeightsLearningRate, b, pos, delta); vector_b[pos] += newLearningRate * delta; //Clean sparse feature weight error vector_bf[pos] = 0; } } } }); }
public void UpdateBigramTransition(Sequence seq) { var OutputLayerSize = OutputLayer.LayerSize; var numStates = seq.States.Length; var m_DeltaBigramLM = new Matrix <float>(OutputLayerSize, OutputLayerSize); for (var timeat = 1; timeat < numStates; timeat++) { var CRFSeqOutput_timeat = CRFSeqOutput[timeat]; var CRFSeqOutput_pre_timeat = CRFSeqOutput[timeat - 1]; for (var i = 0; i < OutputLayerSize; i++) { var CRFSeqOutput_timeat_i = CRFSeqOutput_timeat[i]; var CRFTagTransWeights_i = CRFTagTransWeights[i]; var m_DeltaBigramLM_i = m_DeltaBigramLM[i]; var j = 0; var vecCRFSeqOutput_timeat_i = new Vector <float>(CRFSeqOutput_timeat_i); while (j < OutputLayerSize - Vector <float> .Count) { var v1 = new Vector <float>(CRFTagTransWeights_i, j); var v2 = new Vector <float>(CRFSeqOutput_pre_timeat, j); var v = new Vector <float>(m_DeltaBigramLM_i, j); v -= v1 * vecCRFSeqOutput_timeat_i * v2; v.CopyTo(m_DeltaBigramLM_i, j); j += Vector <float> .Count; } while (j < OutputLayerSize) { m_DeltaBigramLM_i[j] -= CRFTagTransWeights_i[j] * CRFSeqOutput_timeat_i * CRFSeqOutput_pre_timeat[j]; j++; } } var iTagId = seq.States[timeat].Label; var iLastTagId = seq.States[timeat - 1].Label; m_DeltaBigramLM[iTagId][iLastTagId] += 1; } //Update tag Bigram LM for (var b = 0; b < OutputLayerSize; b++) { var vector_b = CRFTagTransWeights[b]; var vector_delta_b = m_DeltaBigramLM[b]; var a = 0; while (a < OutputLayerSize - Vector <float> .Count) { var v1 = new Vector <float>(vector_delta_b, a); var v = new Vector <float>(vector_b, a); //Normalize delta v1 = RNNHelper.NormalizeGradient(v1); //Update weights v += RNNHelper.vecNormalLearningRate * v1; v.CopyTo(vector_b, a); a += Vector <float> .Count; } while (a < OutputLayerSize) { vector_b[a] += RNNHelper.LearningRate * RNNHelper.NormalizeGradient(vector_delta_b[a]); a++; } } }
public virtual void UpdateWeights() { Vector <float> vecMiniBatchSize = new Vector <float>(RNNHelper.MiniBatchSize); for (var i = 0; i < LayerSize; i++) { if (SparseFeatureSize > 0) { var sparseWeights_i = SparseWeights[i]; var sparseDelta_i = SparseWeightsDelta[i]; var sparseLearningRate_i = SparseWeightsLearningRate[i]; var j = 0; var moreItems = (SparseFeatureSize % Vector <float> .Count); while (j < SparseFeatureSize - moreItems) { Vector <float> vecDelta = new Vector <float>(sparseDelta_i, j); Vector <float> .Zero.CopyTo(sparseDelta_i, j); if (vecDelta != Vector <float> .Zero) { vecDelta = vecDelta / vecMiniBatchSize; vecDelta = RNNHelper.NormalizeGradient(vecDelta); var wlr_i = new Vector <float>(sparseLearningRate_i, j); var vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref wlr_i); wlr_i.CopyTo(sparseLearningRate_i, j); vecDelta = vecLearningRate * vecDelta; Vector <float> vecWeights = new Vector <float>(sparseWeights_i, j); vecWeights += vecDelta; vecWeights.CopyTo(sparseWeights_i, j); } j += Vector <float> .Count; } while (j < SparseFeatureSize) { var delta = sparseDelta_i[j]; sparseDelta_i[j] = 0; delta = delta / RNNHelper.MiniBatchSize; delta = RNNHelper.NormalizeGradient(delta); var newLearningRate = RNNHelper.ComputeLearningRate(SparseWeightsLearningRate, i, j, delta); sparseWeights_i[j] += newLearningRate * delta; j++; } } if (DenseFeatureSize > 0) { var denseWeights_i = DenseWeights[i]; var denseDelta_i = DenseWeightsDelta[i]; var denseLearningRate_i = DenseWeightsLearningRate[i]; var j = 0; var moreItems = (DenseFeatureSize % Vector <float> .Count); while (j < DenseFeatureSize - moreItems) { Vector <float> vecDelta = new Vector <float>(denseDelta_i, j); Vector <float> .Zero.CopyTo(denseDelta_i, j); vecDelta = vecDelta / vecMiniBatchSize; vecDelta = RNNHelper.NormalizeGradient(vecDelta); var wlr_i = new Vector <float>(denseLearningRate_i, j); var vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref wlr_i); wlr_i.CopyTo(denseLearningRate_i, j); vecDelta = vecLearningRate * vecDelta; Vector <float> vecWeights = new Vector <float>(denseWeights_i, j); vecWeights += vecDelta; vecWeights.CopyTo(denseWeights_i, j); j += Vector <float> .Count; } while (j < DenseFeatureSize) { var delta = denseDelta_i[j]; denseDelta_i[j] = 0; delta = delta / RNNHelper.MiniBatchSize; delta = RNNHelper.NormalizeGradient(delta); var newLearningRate = RNNHelper.ComputeLearningRate(DenseWeightsLearningRate, i, j, delta); denseWeights_i[j] += newLearningRate * delta; j++; } } } }
public void UpdateBigramTransition(Sequence seq) { int OutputLayerSize = OutputLayer.LayerSize; int numStates = seq.States.Length; Matrix <double> m_DeltaBigramLM = new Matrix <double>(OutputLayerSize, OutputLayerSize); for (int timeat = 1; timeat < numStates; timeat++) { double[] CRFSeqOutput_timeat = CRFSeqOutput[timeat]; double[] CRFSeqOutput_pre_timeat = CRFSeqOutput[timeat - 1]; for (int i = 0; i < OutputLayerSize; i++) { double CRFSeqOutput_timeat_i = CRFSeqOutput_timeat[i]; double[] CRFTagTransWeights_i = CRFTagTransWeights[i]; double[] m_DeltaBigramLM_i = m_DeltaBigramLM[i]; int j = 0; Vector <double> vecCRFSeqOutput_timeat_i = new Vector <double>(CRFSeqOutput_timeat_i); while (j < OutputLayerSize - Vector <double> .Count) { Vector <double> v1 = new Vector <double>(CRFTagTransWeights_i, j); Vector <double> v2 = new Vector <double>(CRFSeqOutput_pre_timeat, j); Vector <double> v = new Vector <double>(m_DeltaBigramLM_i, j); v -= (v1 * vecCRFSeqOutput_timeat_i * v2); v.CopyTo(m_DeltaBigramLM_i, j); j += Vector <double> .Count; } while (j < OutputLayerSize) { m_DeltaBigramLM_i[j] -= (CRFTagTransWeights_i[j] * CRFSeqOutput_timeat_i * CRFSeqOutput_pre_timeat[j]); j++; } } int iTagId = seq.States[timeat].Label; int iLastTagId = seq.States[timeat - 1].Label; m_DeltaBigramLM[iTagId][iLastTagId] += 1; } //Update tag Bigram LM for (int b = 0; b < OutputLayerSize; b++) { double[] vector_b = CRFTagTransWeights[b]; double[] vector_delta_b = m_DeltaBigramLM[b]; int a = 0; while (a < OutputLayerSize - Vector <double> .Count) { Vector <double> v1 = new Vector <double>(vector_delta_b, a); Vector <double> v = new Vector <double>(vector_b, a); //Normalize delta v1 = RNNHelper.NormalizeGradient(v1); //Update weights v += RNNHelper.vecNormalLearningRate * v1; v.CopyTo(vector_b, a); a += Vector <double> .Count; } while (a < OutputLayerSize) { vector_b[a] += RNNHelper.LearningRate * RNNHelper.NormalizeGradient(vector_delta_b[a]); a++; } } }