public override void Load(BinaryReader br) { //Load basic parameters LayerSize = br.ReadInt32(); SparseFeatureSize = br.ReadInt32(); DenseFeatureSize = br.ReadInt32(); AllocateMemoryForCells(); Logger.WriteLine("Loading bptt hidden weights..."); BpttWeights = RNNHelper.LoadMatrix(br); if (SparseFeatureSize > 0) { Logger.WriteLine("Loading sparse feature weights..."); SparseWeights = RNNHelper.LoadMatrix(br); } if (DenseFeatureSize > 0) { Logger.WriteLine("Loading dense feature weights..."); DenseWeights = RNNHelper.LoadMatrix(br); } }
public virtual void ForwardPass(SparseVector sparseFeature, float[] denseFeature, bool isTrain = true) { if (DenseFeatureSize > 0) { DenseFeature = denseFeature; RNNHelper.matrixXvectorADD(Cell, denseFeature, DenseWeights, LayerSize, DenseFeatureSize); } if (SparseFeatureSize > 0) { //Apply sparse features SparseFeature = sparseFeature; Parallel.For(0, LayerSize, parallelOption, b => { float score = 0; var vector_b = SparseWeights[b]; foreach (var pair in SparseFeature) { score += pair.Value * vector_b[pair.Key]; } Cell[b] += score; }); } }
private void learnBptt() { for (int step = 0; step < bptt + bptt_block - 2; step++) { if (null == bptt_inputs[step] && null == bptt_fea[step]) { break; } var sparse = bptt_inputs[step]; var bptt_fea_step = bptt_fea[step]; var last_bptt_hidden = bptt_hidden[step + 1]; var last_last_bptt_hidden = bptt_hidden[step + 2]; Parallel.For(0, LayerSize, parallelOption, a => { // compute hidden layer gradient er[a] *= cellOutput[a] * (1 - cellOutput[a]); //dense weight update fea->0 double[] vector_a = null; double er2 = er[a]; Vector <double> vecErr = new Vector <double>(er2); int i = 0; if (DenseFeatureSize > 0) { vector_a = DenseWeightsDelta[a]; i = 0; while (i < DenseFeatureSize - Vector <double> .Count) { Vector <double> v1 = new Vector <double>(bptt_fea_step, i); Vector <double> v2 = new Vector <double>(vector_a, i); v2 += vecErr * v1; v2.CopyTo(vector_a, i); i += Vector <double> .Count; } while (i < DenseFeatureSize) { vector_a[i] += er2 * bptt_fea_step[i]; i++; } } if (SparseFeatureSize > 0) { //sparse weight update hidden->input vector_a = SparseWeightsDelta[a]; for (i = 0; i < sparse.Count; i++) { var entry = sparse.GetEntry(i); vector_a[entry.Key] += er2 * entry.Value; } } //bptt weight update vector_a = BpttWeightsDelta[a]; i = 0; while (i < LayerSize - Vector <double> .Count) { Vector <double> v1 = new Vector <double>(previousCellOutput, i); Vector <double> v2 = new Vector <double>(vector_a, i); v2 += vecErr * v1; v2.CopyTo(vector_a, i); i += Vector <double> .Count; } while (i < LayerSize) { vector_a[i] += er2 * previousCellOutput[i]; i++; } }); //propagates errors hidden->input to the recurrent part double[] previousHiddenErr = new double[LayerSize]; RNNHelper.matrixXvectorADDErr(previousHiddenErr, er, BpttWeights, LayerSize, LayerSize); for (int a = 0; a < LayerSize; a++) { //propagate error from time T-n to T-n-1 er[a] = previousHiddenErr[a] + last_bptt_hidden.er[a]; } if (step < bptt + bptt_block - 3) { for (int a = 0; a < LayerSize; a++) { cellOutput[a] = last_bptt_hidden.cellOutput[a]; previousCellOutput[a] = last_last_bptt_hidden.cellOutput[a]; } } } //restore hidden layer after bptt bptt_hidden[0].cellOutput.CopyTo(cellOutput, 0); Parallel.For(0, LayerSize, parallelOption, b => { double[] vector_b = null; double[] vector_bf = null; double[] vector_lr = null; //Update bptt feature weights vector_b = BpttWeights[b]; vector_bf = BpttWeightsDelta[b]; vector_lr = BpttWeightsLearningRate[b]; int i = 0; while (i < LayerSize - Vector <double> .Count) { Vector <double> vecDelta = new Vector <double>(vector_bf, i); Vector <double> vecLearningRateWeights = new Vector <double>(vector_lr, i); Vector <double> vecB = new Vector <double>(vector_b, i); //Normalize delta vecDelta = RNNHelper.NormalizeGradient(vecDelta); //Computing learning rate and update its weights Vector <double> vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref vecLearningRateWeights); vecLearningRateWeights.CopyTo(vector_lr, i); //Update weights vecB += vecLearningRate * vecDelta; vecB.CopyTo(vector_b, i); //Clean weights Vector <double> .Zero.CopyTo(vector_bf, i); i += Vector <double> .Count; } while (i < LayerSize) { double delta = RNNHelper.NormalizeGradient(vector_bf[i]); double newLearningRate = RNNHelper.UpdateLearningRate(BpttWeightsLearningRate, b, i, delta); vector_b[i] += newLearningRate * delta; //Clean bptt weight error vector_bf[i] = 0; i++; } //Update dense feature weights if (DenseFeatureSize > 0) { vector_b = DenseWeights[b]; vector_bf = DenseWeightsDelta[b]; vector_lr = DenseWeightsLearningRate[b]; i = 0; while (i < DenseFeatureSize - Vector <double> .Count) { Vector <double> vecDelta = new Vector <double>(vector_bf, i); Vector <double> vecLearningRateWeights = new Vector <double>(vector_lr, i); Vector <double> vecB = new Vector <double>(vector_b, i); //Normalize delta vecDelta = RNNHelper.NormalizeGradient(vecDelta); //Computing learning rate and update its weights Vector <double> vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref vecLearningRateWeights); vecLearningRateWeights.CopyTo(vector_lr, i); //Update weights vecB += vecLearningRate * vecDelta; vecB.CopyTo(vector_b, i); //Clean weights vecDelta = Vector <double> .Zero; vecDelta.CopyTo(vector_bf, i); i += Vector <double> .Count; } while (i < DenseFeatureSize) { double delta = RNNHelper.NormalizeGradient(vector_bf[i]); double newLearningRate = RNNHelper.UpdateLearningRate(DenseWeightsLearningRate, b, i, delta); vector_b[i] += newLearningRate * delta; //Clean dense feature weights error vector_bf[i] = 0; i++; } } if (SparseFeatureSize > 0) { //Update sparse feature weights vector_b = SparseWeights[b]; vector_bf = SparseWeightsDelta[b]; for (int step = 0; step < bptt + bptt_block - 2; step++) { var sparse = bptt_inputs[step]; if (sparse == null) { break; } for (i = 0; i < sparse.Count; i++) { int pos = sparse.GetEntry(i).Key; double delta = RNNHelper.NormalizeGradient(vector_bf[pos]); double newLearningRate = RNNHelper.UpdateLearningRate(SparseWeightsLearningRate, b, pos, delta); vector_b[pos] += newLearningRate * delta; //Clean sparse feature weight error vector_bf[pos] = 0; } } } }); }
public override void ForwardPass(SparseVector sparseFeature, float[] denseFeature) { if (runningMode == RunningMode.Training) { negativeSampleWordList.Clear(); foreach (var labelId in LabelShortList) { negativeSampleWordList.Add(labelId); } for (var i = 0; i < NegativeSampleSize; i++) { var wordId = rand.Next() % LayerSize; while (negativeSampleWordList.Contains(wordId)) { wordId = (wordId + 1) % LayerSize; } negativeSampleWordList.Add(wordId); } if (DenseFeatureSize > 0) { DenseFeature = denseFeature; RNNHelper.matrixXvectorADD(Cells, denseFeature, DenseWeights, negativeSampleWordList, DenseFeatureSize); } if (SparseFeatureSize > 0) { //Apply sparse features SparseFeature = sparseFeature; foreach (var b in negativeSampleWordList) { float score = 0; var vector_b = SparseWeights[b]; foreach (var pair in SparseFeature) { score += pair.Value * vector_b[pair.Key]; } Cells[b] += score; } } //Softmax double sum = 0; foreach (var c in negativeSampleWordList) { var cell = Cells[c]; if (cell > 50) { cell = 50; } if (cell < -50) { cell = -50; } var val = (float)Math.Exp(cell); sum += val; Cells[c] = val; } foreach (var c in negativeSampleWordList) { Cells[c] /= (float)sum; } } else { base.ForwardPass(sparseFeature, denseFeature); } }
public override int[] ProcessSeq2Seq(SequencePair pSequence, RunningMode runningMode) { var tgtSequence = pSequence.tgtSequence; var isTraining = runningMode == RunningMode.Training; //Reset all layers foreach (var layer in HiddenLayerList) { layer.Reset(isTraining); } //Extract features from source sentences var srcSequence = pSequence.autoEncoder.Config.BuildSequence(pSequence.srcSentence); float[] srcHiddenAvgOutput; Dictionary <int, float> srcSparseFeatures; ExtractSourceSentenceFeature(pSequence.autoEncoder, srcSequence, tgtSequence.SparseFeatureSize, out srcHiddenAvgOutput, out srcSparseFeatures); var numStates = pSequence.tgtSequence.States.Length; var numLayers = HiddenLayerList.Count; var predicted = new int[numStates]; //Set target sentence labels into short list in output layer OutputLayer.LabelShortList = new List <int>(); foreach (var state in tgtSequence.States) { OutputLayer.LabelShortList.Add(state.Label); } for (var curState = 0; curState < numStates; curState++) { //Build runtime features var state = tgtSequence.States[curState]; SetRuntimeFeatures(state, curState, numStates, predicted); //Build sparse features for all layers var sparseVector = new SparseVector(); sparseVector.SetLength(tgtSequence.SparseFeatureSize + srcSequence.SparseFeatureSize); sparseVector.AddKeyValuePairData(state.SparseFeature); sparseVector.AddKeyValuePairData(srcSparseFeatures); //Compute first layer var denseFeatures = RNNHelper.ConcatenateVector(state.DenseFeature, srcHiddenAvgOutput); HiddenLayerList[0].ForwardPass(sparseVector, denseFeatures, isTraining); //Compute middle layers for (var i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].Cell, srcHiddenAvgOutput); HiddenLayerList[i].ForwardPass(sparseVector, denseFeatures, isTraining); } //Compute output layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].Cell, srcHiddenAvgOutput); OutputLayer.ForwardPass(sparseVector, denseFeatures, isTraining); OutputLayer.Softmax(isTraining); predicted[curState] = OutputLayer.GetBestOutputIndex(isTraining); if (runningMode != RunningMode.Test) { logp += Math.Log10(OutputLayer.Cell[state.Label] + 0.0001); } if (runningMode == RunningMode.Training) { // error propogation OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer); for (var i = numLayers - 2; i >= 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]); } //Update net weights Parallel.Invoke(() => { OutputLayer.BackwardPass(numStates, curState); }, () => { Parallel.For(0, numLayers, parallelOption, i => { HiddenLayerList[i].BackwardPass(numStates, curState); }); }); } } return(predicted); }
public override int[] TestSeq2Seq(Sentence srcSentence, Config featurizer) { var curState = featurizer.BuildState(new[] { "<s>" }); curState.Label = featurizer.TagSet.GetIndex("<s>"); //Reset all layers foreach (var layer in HiddenLayerList) { layer.Reset(false); } //Extract features from source sentence var srcSequence = featurizer.Seq2SeqAutoEncoder.Config.BuildSequence(srcSentence); float[] srcHiddenAvgOutput; Dictionary <int, float> srcSparseFeatures; ExtractSourceSentenceFeature(featurizer.Seq2SeqAutoEncoder, srcSequence, curState.SparseFeature.Length, out srcHiddenAvgOutput, out srcSparseFeatures); var numLayers = HiddenLayerList.Count; var predicted = new List <int> { curState.Label }; while (true) { //Build sparse features var sparseVector = new SparseVector(); sparseVector.SetLength(curState.SparseFeature.Length + srcSequence.SparseFeatureSize); sparseVector.AddKeyValuePairData(curState.SparseFeature); sparseVector.AddKeyValuePairData(srcSparseFeatures); //Compute first layer var denseFeatures = RNNHelper.ConcatenateVector(curState.DenseFeature, srcHiddenAvgOutput); HiddenLayerList[0].ForwardPass(sparseVector, denseFeatures, false); //Compute middle layers for (var i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].Cell, srcHiddenAvgOutput); HiddenLayerList[i].ForwardPass(sparseVector, denseFeatures, false); } //Compute output layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].Cell, srcHiddenAvgOutput); OutputLayer.ForwardPass(sparseVector, denseFeatures, false); OutputLayer.Softmax(false); var nextTagId = OutputLayer.GetBestOutputIndex(false); var nextWord = featurizer.TagSet.GetTagName(nextTagId); curState = featurizer.BuildState(new[] { nextWord }); curState.Label = nextTagId; predicted.Add(nextTagId); if (nextWord == "</s>" || predicted.Count >= 100) { break; } } return(predicted.ToArray()); }
public void UpdateBigramTransition(Sequence seq) { var OutputLayerSize = OutputLayer.LayerSize; var numStates = seq.States.Length; var m_DeltaBigramLM = new Matrix <float>(OutputLayerSize, OutputLayerSize); for (var timeat = 1; timeat < numStates; timeat++) { var CRFSeqOutput_timeat = CRFSeqOutput[timeat]; var CRFSeqOutput_pre_timeat = CRFSeqOutput[timeat - 1]; for (var i = 0; i < OutputLayerSize; i++) { var CRFSeqOutput_timeat_i = CRFSeqOutput_timeat[i]; var CRFTagTransWeights_i = CRFTagTransWeights[i]; var m_DeltaBigramLM_i = m_DeltaBigramLM[i]; var j = 0; var vecCRFSeqOutput_timeat_i = new Vector <float>(CRFSeqOutput_timeat_i); while (j < OutputLayerSize - Vector <float> .Count) { var v1 = new Vector <float>(CRFTagTransWeights_i, j); var v2 = new Vector <float>(CRFSeqOutput_pre_timeat, j); var v = new Vector <float>(m_DeltaBigramLM_i, j); v -= v1 * vecCRFSeqOutput_timeat_i * v2; v.CopyTo(m_DeltaBigramLM_i, j); j += Vector <float> .Count; } while (j < OutputLayerSize) { m_DeltaBigramLM_i[j] -= CRFTagTransWeights_i[j] * CRFSeqOutput_timeat_i * CRFSeqOutput_pre_timeat[j]; j++; } } var iTagId = seq.States[timeat].Label; var iLastTagId = seq.States[timeat - 1].Label; m_DeltaBigramLM[iTagId][iLastTagId] += 1; } //Update tag Bigram LM for (var b = 0; b < OutputLayerSize; b++) { var vector_b = CRFTagTransWeights[b]; var vector_delta_b = m_DeltaBigramLM[b]; var a = 0; while (a < OutputLayerSize - Vector <float> .Count) { var v1 = new Vector <float>(vector_delta_b, a); var v = new Vector <float>(vector_b, a); //Normalize delta v1 = RNNHelper.NormalizeGradient(v1); //Update weights v += RNNHelper.vecNormalLearningRate * v1; v.CopyTo(vector_b, a); a += Vector <float> .Count; } while (a < OutputLayerSize) { vector_b[a] += RNNHelper.LearningRate * RNNHelper.NormalizeGradient(vector_delta_b[a]); a++; } } }
public override void ComputeLayerErr(SimpleLayer nextLayer) { //error output->hidden for words from specific class RNNHelper.matrixXvectorADDErr(er, nextLayer.er, nextLayer.DenseWeights, negativeSampleWordList, nextLayer.LayerSize); }
public virtual void ComputeLayerErr(List <float[]> destErrsList, bool cleanDest = true) { RNNHelper.matrixXvectorADDErr(destErrsList, Errs, DenseWeights, cleanDest); }
public virtual void UpdateWeights() { Vector <float> vecMiniBatchSize = new Vector <float>(RNNHelper.MiniBatchSize); for (var i = 0; i < LayerSize; i++) { if (SparseFeatureSize > 0) { var sparseWeights_i = SparseWeights[i]; var sparseDelta_i = SparseWeightsDelta[i]; var sparseLearningRate_i = SparseWeightsLearningRate[i]; var j = 0; var moreItems = (SparseFeatureSize % Vector <float> .Count); while (j < SparseFeatureSize - moreItems) { Vector <float> vecDelta = new Vector <float>(sparseDelta_i, j); Vector <float> .Zero.CopyTo(sparseDelta_i, j); if (vecDelta != Vector <float> .Zero) { vecDelta = vecDelta / vecMiniBatchSize; vecDelta = RNNHelper.NormalizeGradient(vecDelta); var wlr_i = new Vector <float>(sparseLearningRate_i, j); var vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref wlr_i); wlr_i.CopyTo(sparseLearningRate_i, j); vecDelta = vecLearningRate * vecDelta; Vector <float> vecWeights = new Vector <float>(sparseWeights_i, j); vecWeights += vecDelta; vecWeights.CopyTo(sparseWeights_i, j); } j += Vector <float> .Count; } while (j < SparseFeatureSize) { var delta = sparseDelta_i[j]; sparseDelta_i[j] = 0; delta = delta / RNNHelper.MiniBatchSize; delta = RNNHelper.NormalizeGradient(delta); var newLearningRate = RNNHelper.ComputeLearningRate(SparseWeightsLearningRate, i, j, delta); sparseWeights_i[j] += newLearningRate * delta; j++; } } if (DenseFeatureSize > 0) { var denseWeights_i = DenseWeights[i]; var denseDelta_i = DenseWeightsDelta[i]; var denseLearningRate_i = DenseWeightsLearningRate[i]; var j = 0; var moreItems = (DenseFeatureSize % Vector <float> .Count); while (j < DenseFeatureSize - moreItems) { Vector <float> vecDelta = new Vector <float>(denseDelta_i, j); Vector <float> .Zero.CopyTo(denseDelta_i, j); vecDelta = vecDelta / vecMiniBatchSize; vecDelta = RNNHelper.NormalizeGradient(vecDelta); var wlr_i = new Vector <float>(denseLearningRate_i, j); var vecLearningRate = RNNHelper.ComputeLearningRate(vecDelta, ref wlr_i); wlr_i.CopyTo(denseLearningRate_i, j); vecDelta = vecLearningRate * vecDelta; Vector <float> vecWeights = new Vector <float>(denseWeights_i, j); vecWeights += vecDelta; vecWeights.CopyTo(denseWeights_i, j); j += Vector <float> .Count; } while (j < DenseFeatureSize) { var delta = denseDelta_i[j]; denseDelta_i[j] = 0; delta = delta / RNNHelper.MiniBatchSize; delta = RNNHelper.NormalizeGradient(delta); var newLearningRate = RNNHelper.ComputeLearningRate(DenseWeightsLearningRate, i, j, delta); denseWeights_i[j] += newLearningRate * delta; j++; } } } }
public override void LoadModel(string filename) { Logger.WriteLine(Logger.Level.info, "Loading bi-directional model: {0}", filename); using (StreamReader sr = new StreamReader(filename)) { BinaryReader br = new BinaryReader(sr.BaseStream); int modelType = br.ReadInt32(); ModelDirection = (MODELDIRECTION)br.ReadInt32(); int iflag = br.ReadInt32(); if (iflag == 1) { IsCRFTraining = true; } else { IsCRFTraining = false; } int layerSize = br.ReadInt32(); //Load forward layers from file forwardHiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < layerSize; i++) { SimpleLayer layer = null; if (modelType == 0) { Logger.WriteLine("Create BPTT hidden layer"); layer = new BPTTLayer(); } else { Logger.WriteLine("Crate LSTM hidden layer"); layer = new LSTMLayer(); } layer.Load(br); forwardHiddenLayers.Add(layer); } //Load backward layers from file backwardHiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < layerSize; i++) { SimpleLayer layer = null; if (modelType == 0) { Logger.WriteLine("Create BPTT hidden layer"); layer = new BPTTLayer(); } else { Logger.WriteLine("Crate LSTM hidden layer"); layer = new LSTMLayer(); } layer.Load(br); backwardHiddenLayers.Add(layer); } OutputLayer = new SimpleLayer(); OutputLayer.Load(br); if (iflag == 1) { Logger.WriteLine("Loading CRF tag trans weights..."); CRFTagTransWeights = RNNHelper.LoadMatrix(br); } } }
public virtual void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true) { DenseFeature = denseFeature; RNNHelper.matrixXvectorADD(cellOutput, denseFeature, DenseWeights, LayerSize, DenseFeatureSize); }
public void UpdateBigramTransition(Sequence seq) { int OutputLayerSize = OutputLayer.LayerSize; int numStates = seq.States.Length; Matrix <double> m_DeltaBigramLM = new Matrix <double>(OutputLayerSize, OutputLayerSize); for (int timeat = 1; timeat < numStates; timeat++) { double[] CRFSeqOutput_timeat = CRFSeqOutput[timeat]; double[] CRFSeqOutput_pre_timeat = CRFSeqOutput[timeat - 1]; for (int i = 0; i < OutputLayerSize; i++) { double CRFSeqOutput_timeat_i = CRFSeqOutput_timeat[i]; double[] CRFTagTransWeights_i = CRFTagTransWeights[i]; double[] m_DeltaBigramLM_i = m_DeltaBigramLM[i]; int j = 0; Vector <double> vecCRFSeqOutput_timeat_i = new Vector <double>(CRFSeqOutput_timeat_i); while (j < OutputLayerSize - Vector <double> .Count) { Vector <double> v1 = new Vector <double>(CRFTagTransWeights_i, j); Vector <double> v2 = new Vector <double>(CRFSeqOutput_pre_timeat, j); Vector <double> v = new Vector <double>(m_DeltaBigramLM_i, j); v -= (v1 * vecCRFSeqOutput_timeat_i * v2); v.CopyTo(m_DeltaBigramLM_i, j); j += Vector <double> .Count; } while (j < OutputLayerSize) { m_DeltaBigramLM_i[j] -= (CRFTagTransWeights_i[j] * CRFSeqOutput_timeat_i * CRFSeqOutput_pre_timeat[j]); j++; } } int iTagId = seq.States[timeat].Label; int iLastTagId = seq.States[timeat - 1].Label; m_DeltaBigramLM[iTagId][iLastTagId] += 1; } //Update tag Bigram LM for (int b = 0; b < OutputLayerSize; b++) { double[] vector_b = CRFTagTransWeights[b]; double[] vector_delta_b = m_DeltaBigramLM[b]; int a = 0; while (a < OutputLayerSize - Vector <double> .Count) { Vector <double> v1 = new Vector <double>(vector_delta_b, a); Vector <double> v = new Vector <double>(vector_b, a); //Normalize delta v1 = RNNHelper.NormalizeGradient(v1); //Update weights v += RNNHelper.vecNormalLearningRate * v1; v.CopyTo(vector_b, a); a += Vector <double> .Count; } while (a < OutputLayerSize) { vector_b[a] += RNNHelper.LearningRate * RNNHelper.NormalizeGradient(vector_delta_b[a]); a++; } } }
public override void ComputeLayerErr(float[] destErrs, bool cleanDest = true) { RNNHelper.matrixXvectorADDErr(destErrs, Errs, DenseWeights, negativeSampleWordList, cleanDest); }
public override void LoadModel(string filename) { Logger.WriteLine(Logger.Level.info, "Loading bi-directional model: {0}", filename); using (var sr = new StreamReader(filename)) { var br = new BinaryReader(sr.BaseStream); var layerType = (LAYERTYPE)br.ReadInt32(); IsCRFTraining = br.ReadBoolean(); var layerSize = br.ReadInt32(); //Load forward layers from file forwardHiddenLayers = new List <SimpleLayer>(); for (var i = 0; i < layerSize; i++) { SimpleLayer layer; if (layerType == LAYERTYPE.BPTT) { Logger.WriteLine("Create BPTT hidden layer"); layer = new BPTTLayer(); } else { Logger.WriteLine("Create LSTM hidden layer"); layer = new LSTMLayer(); } layer.Load(br); forwardHiddenLayers.Add(layer); } //Load backward layers from file backwardHiddenLayers = new List <SimpleLayer>(); for (var i = 0; i < layerSize; i++) { SimpleLayer layer; if (layerType == LAYERTYPE.BPTT) { Logger.WriteLine("Create BPTT hidden layer"); layer = new BPTTLayer(); } else { Logger.WriteLine("Create LSTM hidden layer"); layer = new LSTMLayer(); } layer.Load(br); backwardHiddenLayers.Add(layer); } Logger.WriteLine("Create output layer"); OutputLayer = new SimpleLayer(); OutputLayer.Load(br); if (IsCRFTraining) { Logger.WriteLine("Loading CRF tag trans weights..."); CRFTagTransWeights = RNNHelper.LoadMatrix(br); } } }
public override int[] TestSeq2Seq(Sentence srcSentence, Featurizer featurizer) { State curState = featurizer.ExtractFeatures(new string[] { "<s>" }); curState.Label = featurizer.TagSet.GetIndex("<s>"); //Reset all layers foreach (SimpleLayer layer in HiddenLayerList) { layer.netReset(false); } //Extract features from source sentence Sequence srcSequence = featurizer.AutoEncoder.Featurizer.ExtractFeatures(srcSentence); double[] srcHiddenAvgOutput; Dictionary <int, float> srcSparseFeatures; ExtractSourceSentenceFeature(featurizer.AutoEncoder, srcSequence, curState.SparseFeature.Length, out srcHiddenAvgOutput, out srcSparseFeatures); int numLayers = HiddenLayerList.Count; List <int> predicted = new List <int>(); predicted.Add(curState.Label); while (true) { //Build sparse features SparseVector sparseVector = new SparseVector(); sparseVector.SetLength(curState.SparseFeature.Length + srcSequence.SparseFeatureSize); sparseVector.AddKeyValuePairData(curState.SparseFeature); sparseVector.AddKeyValuePairData(srcSparseFeatures); //Compute first layer double[] denseFeatures = RNNHelper.ConcatenateVector(curState.DenseFeature, srcHiddenAvgOutput); HiddenLayerList[0].computeLayer(sparseVector, denseFeatures, false); //Compute middle layers for (int i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].cellOutput, srcHiddenAvgOutput); HiddenLayerList[i].computeLayer(sparseVector, denseFeatures, false); } //Compute output layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].cellOutput, srcHiddenAvgOutput); OutputLayer.computeLayer(sparseVector, denseFeatures, false); OutputLayer.Softmax(false); int nextTagId = OutputLayer.GetBestOutputIndex(false); string nextWord = featurizer.TagSet.GetTagName(nextTagId); curState = featurizer.ExtractFeatures(new string[] { nextWord }); curState.Label = nextTagId; predicted.Add(nextTagId); if (nextWord == "</s>" || predicted.Count >= 100) { break; } } return(predicted.ToArray()); }