public virtual void LearnFeatureWeights(int numStates, int curState) { //Update hidden-output weights Parallel.For(0, LayerSize, parallelOption, c => { double er2 = er[c]; double[] vector_c = DenseWeights[c]; for (int a = 0; a < DenseFeatureSize; a++) { double delta = RNNHelper.NormalizeGradient(er2 * DenseFeature[a]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, c, a, delta); vector_c[a] += newLearningRate * delta; } }); }
public override void LearnFeatureWeights(int numStates, int curState) { //Update hidden-output weights Parallel.ForEach(negativeSampleWordList, c => { double er2 = er[c]; double[] vector_c = DenseWeights[c]; for (int a = 0; a < DenseFeatureSize; a++) { double delta = RNNHelper.NormalizeGradient(er2 * DenseFeature[a]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, c, a, delta); vector_c[a] += newLearningRate * delta; } }); }
public override void BackwardPass(int numStates, int curState) { if (DenseFeatureSize > 0) { //Update hidden-output weights Parallel.ForEach(negativeSampleWordList, c => { var err = Err[c]; var featureWeightCol = DenseWeights[c]; var featureWeightsLearningRateCol = DenseWeightsLearningRate[c]; var j = 0; while (j < DenseFeatureSize - Vector <float> .Count) { RNNHelper.UpdateFeatureWeights(DenseFeature, featureWeightCol, featureWeightsLearningRateCol, err, j); j += Vector <float> .Count; } while (j < DenseFeatureSize) { var delta = RNNHelper.NormalizeGradient(err * DenseFeature[j]); var newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, c, j, delta); featureWeightCol[j] += newLearningRate * delta; j++; } }); } if (SparseFeatureSize > 0) { //Update hidden-output weights Parallel.ForEach(negativeSampleWordList, c => { var er2 = Err[c]; var vector_c = SparseWeights[c]; foreach (var pair in SparseFeature) { var pos = pair.Key; var val = pair.Value; var delta = RNNHelper.NormalizeGradient(er2 * val); var newLearningRate = RNNHelper.UpdateLearningRate(SparseWeightsLearningRate, c, pos, delta); vector_c[pos] += newLearningRate * delta; } }); } }
public virtual void BackwardPass() { if (DenseFeatureSize > 0) { //Update hidden-output weights for (var c = 0; c < LayerSize; c++) { var err = Errs[c]; var featureWeightCol = DenseWeights[c]; var featureWeightsLearningRateCol = DenseWeightsLearningRate[c]; var j = 0; while (j < DenseFeatureSize) { UpdateFeatureWeights(DenseFeature, featureWeightCol, featureWeightsLearningRateCol, err, j, c); j += Vector <float> .Count; } } } if (SparseFeatureSize > 0) { //Update hidden-output weights for (var c = 0; c < LayerSize; c++) { var er2 = Errs[c]; var vector_c = SparseWeights[c]; foreach (var pair in SparseFeature) { var pos = pair.Key; var val = pair.Value; var delta = er2 * val; // RNNHelper.NormalizeGradient(er2 * val); var newLearningRate = RNNHelper.UpdateLearningRate(SparseWeightsLearningRate, c, pos, delta); vector_c[pos] += newLearningRate * delta; } } } }
public override void LearnFeatureWeights(int numStates, int curState) { if (DenseFeatureSize > 0) { //Update hidden-output weights Parallel.ForEach(negativeSampleWordList, c => { double er2 = er[c]; double[] vector_c = DenseWeights[c]; for (int a = 0; a < DenseFeatureSize; a++) { double delta = RNNHelper.NormalizeGradient(er2 * DenseFeature[a]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, c, a, delta); vector_c[a] += newLearningRate * delta; } }); } if (SparseFeatureSize > 0) { //Update hidden-output weights Parallel.ForEach(negativeSampleWordList, c => { double er2 = er[c]; double[] vector_c = SparseWeights[c]; foreach (KeyValuePair <int, float> pair in SparseFeature) { int pos = pair.Key; double val = pair.Value; double delta = RNNHelper.NormalizeGradient(er2 * val); double newLearningRate = RNNHelper.UpdateLearningRate(SparseWeightsLearningRate, c, pos, delta); vector_c[pos] += newLearningRate * delta; } }); } }
public virtual void LearnFeatureWeights(int numStates, int curState) { if (DenseFeatureSize > 0) { //Update hidden-output weights Parallel.For(0, LayerSize, parallelOption, c => { double er2 = er[c]; double[] vector_c = DenseWeights[c]; for (int a = 0; a < DenseFeatureSize; a++) { double delta = RNNHelper.NormalizeGradient(er2 * DenseFeature[a]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, c, a, delta); vector_c[a] += newLearningRate * delta; } }); } if (SparseFeatureSize > 0) { //Update hidden-output weights Parallel.For(0, LayerSize, parallelOption, c => { double er2 = er[c]; double[] vector_c = SparseWeights[c]; for (int a = 0; a < SparseFeature.Count; a++) { int pos = SparseFeature.GetEntry(a).Key; double val = SparseFeature.GetEntry(a).Value; double delta = RNNHelper.NormalizeGradient(er2 * val); double newLearningRate = RNNHelper.UpdateLearningRate(SparseWeightsLearningRate, c, pos, delta); vector_c[pos] += newLearningRate * delta; } }); } }
private void learnBptt() { for (int step = 0; step < bptt + bptt_block - 2; step++) { if (null == bptt_inputs[step] && null == bptt_fea[step]) { break; } var sparse = bptt_inputs[step]; var bptt_fea_step = bptt_fea[step]; var last_bptt_hidden = bptt_hidden[step + 1]; var last_last_bptt_hidden = bptt_hidden[step + 2]; Parallel.For(0, LayerSize, parallelOption, a => { // compute hidden layer gradient er[a] *= cellOutput[a] * (1 - cellOutput[a]); //dense weight update fea->0 double[] vector_a = null; double er2 = er[a]; Vector <double> vecErr = new Vector <double>(er2); int i = 0; if (DenseFeatureSize > 0) { vector_a = DenseWeightsDelta[a]; i = 0; while (i < DenseFeatureSize - Vector <double> .Count) { Vector <double> v1 = new Vector <double>(bptt_fea_step, i); Vector <double> v2 = new Vector <double>(vector_a, i); v2 += vecErr * v1; v2.CopyTo(vector_a, i); i += Vector <double> .Count; } while (i < DenseFeatureSize) { vector_a[i] += er2 * bptt_fea_step[i]; i++; } } if (SparseFeatureSize > 0) { //sparse weight update hidden->input vector_a = SparseWeightsDelta[a]; for (i = 0; i < sparse.Count; i++) { var entry = sparse.GetEntry(i); vector_a[entry.Key] += er2 * entry.Value; } } //bptt weight update vector_a = BpttWeightsDelta[a]; i = 0; while (i < LayerSize - Vector <double> .Count) { Vector <double> v1 = new Vector <double>(previousCellOutput, i); Vector <double> v2 = new Vector <double>(vector_a, i); v2 += vecErr * v1; v2.CopyTo(vector_a, i); i += Vector <double> .Count; } while (i < LayerSize) { vector_a[i] += er2 * previousCellOutput[i]; i++; } }); //propagates errors hidden->input to the recurrent part double[] previousHiddenErr = new double[LayerSize]; RNNHelper.matrixXvectorADDErr(previousHiddenErr, er, BpttWeights, LayerSize, LayerSize); for (int a = 0; a < LayerSize; a++) { //propagate error from time T-n to T-n-1 er[a] = previousHiddenErr[a] + last_bptt_hidden.er[a]; } if (step < bptt + bptt_block - 3) { for (int a = 0; a < LayerSize; a++) { cellOutput[a] = last_bptt_hidden.cellOutput[a]; previousCellOutput[a] = last_last_bptt_hidden.cellOutput[a]; } } } //restore hidden layer after bptt bptt_hidden[0].cellOutput.CopyTo(cellOutput, 0); Parallel.For(0, LayerSize, parallelOption, b => { double[] vector_b = null; double[] vector_bf = null; double[] vector_lr = null; //Update bptt feature weights vector_b = BpttWeights[b]; vector_bf = BpttWeightsDelta[b]; vector_lr = BpttWeightsLearningRate[b]; int i = 0; while (i < LayerSize - Vector <double> .Count) { Vector <double> vecDelta = new Vector <double>(vector_bf, i); Vector <double> vecLearningRateWeights = new Vector <double>(vector_lr, i); Vector <double> vecB = new Vector <double>(vector_b, i); //Normalize delta vecDelta = RNNHelper.NormalizeGradient(vecDelta); //Computing learning rate and update its weights Vector <double> vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref vecLearningRateWeights); vecLearningRateWeights.CopyTo(vector_lr, i); //Update weights vecB += vecLearningRate * vecDelta; vecB.CopyTo(vector_b, i); //Clean weights Vector <double> .Zero.CopyTo(vector_bf, i); i += Vector <double> .Count; } while (i < LayerSize) { double delta = RNNHelper.NormalizeGradient(vector_bf[i]); double newLearningRate = RNNHelper.UpdateLearningRate(BpttWeightsLearningRate, b, i, delta); vector_b[i] += newLearningRate * delta; //Clean bptt weight error vector_bf[i] = 0; i++; } //Update dense feature weights if (DenseFeatureSize > 0) { vector_b = DenseWeights[b]; vector_bf = DenseWeightsDelta[b]; vector_lr = DenseWeightsLearningRate[b]; i = 0; while (i < DenseFeatureSize - Vector <double> .Count) { Vector <double> vecDelta = new Vector <double>(vector_bf, i); Vector <double> vecLearningRateWeights = new Vector <double>(vector_lr, i); Vector <double> vecB = new Vector <double>(vector_b, i); //Normalize delta vecDelta = RNNHelper.NormalizeGradient(vecDelta); //Computing learning rate and update its weights Vector <double> vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref vecLearningRateWeights); vecLearningRateWeights.CopyTo(vector_lr, i); //Update weights vecB += vecLearningRate * vecDelta; vecB.CopyTo(vector_b, i); //Clean weights vecDelta = Vector <double> .Zero; vecDelta.CopyTo(vector_bf, i); i += Vector <double> .Count; } while (i < DenseFeatureSize) { double delta = RNNHelper.NormalizeGradient(vector_bf[i]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, b, i, delta); vector_b[i] += newLearningRate * delta; //Clean dense feature weights error vector_bf[i] = 0; i++; } } if (SparseFeatureSize > 0) { //Update sparse feature weights vector_b = SparseWeights[b]; vector_bf = SparseWeightsDelta[b]; for (int step = 0; step < bptt + bptt_block - 2; step++) { var sparse = bptt_inputs[step]; if (sparse == null) { break; } for (i = 0; i < sparse.Count; i++) { int pos = sparse.GetEntry(i).Key; double delta = RNNHelper.NormalizeGradient(vector_bf[pos]); double newLearningRate = RNNHelper.UpdateLearningRate(SparseWeightsLearningRate, b, pos, delta); vector_b[pos] += newLearningRate * delta; //Clean sparse feature weight error vector_bf[pos] = 0; } } } }); }