public SequencePair ExtractFeatures(SentencePair sentence) { SequencePair sPair = new SequencePair(); sPair.autoEncoder = AutoEncoder; sPair.srcSentence = sentence.srcSentence; sPair.tgtSequence = ExtractFeatures(sentence.tgtSentence); return(sPair); }
public SequencePair BuildSequence(SentencePair sentence) { var sPair = new SequencePair { autoEncoder = Seq2SeqAutoEncoder, srcSentence = sentence.srcSentence, tgtSequence = BuildSequence(sentence.tgtSentence) }; return(sPair); }
public override int[] ProcessSeq2Seq(SequencePair pSequence, RunningMode runningMode) { var tgtSequence = pSequence.tgtSequence; var isTraining = runningMode == RunningMode.Training; //Reset all layers foreach (var layer in HiddenLayerList) { layer.Reset(isTraining); } //Extract features from source sentences var srcSequence = pSequence.autoEncoder.Config.BuildSequence(pSequence.srcSentence); float[] srcHiddenAvgOutput; Dictionary <int, float> srcSparseFeatures; ExtractSourceSentenceFeature(pSequence.autoEncoder, srcSequence, tgtSequence.SparseFeatureSize, out srcHiddenAvgOutput, out srcSparseFeatures); var numStates = pSequence.tgtSequence.States.Length; var numLayers = HiddenLayerList.Count; var predicted = new int[numStates]; //Set target sentence labels into short list in output layer OutputLayer.LabelShortList = new List <int>(); foreach (var state in tgtSequence.States) { OutputLayer.LabelShortList.Add(state.Label); } for (var curState = 0; curState < numStates; curState++) { //Build runtime features var state = tgtSequence.States[curState]; SetRuntimeFeatures(state, curState, numStates, predicted); //Build sparse features for all layers var sparseVector = new SparseVector(); sparseVector.SetLength(tgtSequence.SparseFeatureSize + srcSequence.SparseFeatureSize); sparseVector.AddKeyValuePairData(state.SparseFeature); sparseVector.AddKeyValuePairData(srcSparseFeatures); //Compute first layer var denseFeatures = RNNHelper.ConcatenateVector(state.DenseFeature, srcHiddenAvgOutput); HiddenLayerList[0].ForwardPass(sparseVector, denseFeatures, isTraining); //Compute middle layers for (var i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].Cell, srcHiddenAvgOutput); HiddenLayerList[i].ForwardPass(sparseVector, denseFeatures, isTraining); } //Compute output layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].Cell, srcHiddenAvgOutput); OutputLayer.ForwardPass(sparseVector, denseFeatures, isTraining); OutputLayer.Softmax(isTraining); predicted[curState] = OutputLayer.GetBestOutputIndex(isTraining); if (runningMode != RunningMode.Test) { logp += Math.Log10(OutputLayer.Cell[state.Label] + 0.0001); } if (runningMode == RunningMode.Training) { // error propogation OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer); for (var i = numLayers - 2; i >= 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]); } //Update net weights Parallel.Invoke(() => { OutputLayer.BackwardPass(numStates, curState); }, () => { Parallel.For(0, numLayers, parallelOption, i => { HiddenLayerList[i].BackwardPass(numStates, curState); }); }); } } return(predicted); }
public abstract int[] ProcessSeq2Seq(SequencePair pSequence, RunningMode runningMode);
/// <summary> /// Process entire corpus set by given RNN /// </summary> /// <param name="rnns"></param> /// <param name="corpusSet"></param> /// <param name="runningMode"></param> public void Process(List <RNN <T> > rnns, DataSet <T> corpusSet, RunningMode runningMode) { parallelOptions = new ParallelOptions(); parallelOptions.MaxDegreeOfParallelism = Environment.ProcessorCount; processedSequence = 0; processedWordCnt = 0; tknErrCnt = 0; sentErrCnt = 0; corpusSet.Shuffle(); //Add RNN instance into job queue ConcurrentQueue <RNN <T> > qRNNs = new ConcurrentQueue <RNN <T> >(); foreach (var rnn in rnns) { qRNNs.Enqueue(rnn); } Parallel.For(0, corpusSet.SequenceList.Count, parallelOptions, i => { //Get a free RNN instance for running RNN <T> rnn; if (qRNNs.TryDequeue(out rnn) == false) { //The queue is empty, so we clone a new one rnn = rnns[0].Clone(); Logger.WriteLine("Cloned a new RNN instance for training."); } var pSequence = corpusSet.SequenceList[i]; //Calcuate how many tokens we are going to process in this sequence int tokenCnt = 0; if (pSequence is Sequence) { tokenCnt = (pSequence as Sequence).States.Length; } else { SequencePair sp = pSequence as SequencePair; if (sp.srcSentence.TokensList.Count > rnn.MaxSeqLength) { qRNNs.Enqueue(rnn); return; } tokenCnt = sp.tgtSequence.States.Length; } //This sequence is too long, so we ignore it if (tokenCnt > rnn.MaxSeqLength) { qRNNs.Enqueue(rnn); return; } //Run neural network int[] predicted; if (IsCRFTraining) { predicted = rnn.ProcessSequenceCRF(pSequence as Sequence, runningMode); } else { Matrix <float> m; predicted = rnn.ProcessSequence(pSequence, runningMode, false, out m); } //Update counters Interlocked.Add(ref processedWordCnt, tokenCnt); Interlocked.Increment(ref processedSequence); Interlocked.Increment(ref processMiniBatch); int newTknErrCnt; if (pSequence is Sequence) { newTknErrCnt = GetErrorTokenNum(pSequence as Sequence, predicted); } else { newTknErrCnt = GetErrorTokenNum((pSequence as SequencePair).tgtSequence, predicted); } Interlocked.Add(ref tknErrCnt, newTknErrCnt); if (newTknErrCnt > 0) { Interlocked.Increment(ref sentErrCnt); } //Update weights //We only allow one thread to update weights, and other threads keep running to train or predict given sequences //Note: we don't add any lock when updating weights and deltas for weights in order to improve performance singificantly, //so that means race condition will happen and it's okay for us. if (runningMode == RunningMode.Training && processMiniBatch > 0 && processMiniBatch % ModelSettings.MiniBatchSize == 0 && updatingWeights == 0) { Interlocked.Increment(ref updatingWeights); if (updatingWeights == 1) { rnn.UpdateWeights(); Interlocked.Exchange(ref processMiniBatch, 0); } Interlocked.Decrement(ref updatingWeights); } //Show progress information if (processedSequence % 1000 == 0) { Logger.WriteLine("Progress = {0} ", processedSequence / 1000 + "K/" + corpusSet.SequenceList.Count / 1000.0 + "K"); Logger.WriteLine(" Error token ratio = {0}%", (double)tknErrCnt / (double)processedWordCnt * 100.0); Logger.WriteLine(" Error sentence ratio = {0}%", (double)sentErrCnt / (double)processedSequence * 100.0); } //Save intermediate model file if (ModelSettings.SaveStep > 0 && processedSequence % ModelSettings.SaveStep == 0) { //After processed every m_SaveStep sentences, save current model into a temporary file Logger.WriteLine("Saving temporary model into file..."); try { rnn.SaveModel("model.tmp"); } catch (Exception err) { Logger.WriteLine($"Fail to save temporary model into file. Error: {err.Message.ToString()}"); } } qRNNs.Enqueue(rnn); }); }
public void Process(RNN <T> rnn, DataSet <T> trainingSet, RunningMode runningMode, int totalSequenceNum) { //Shffle training corpus trainingSet.Shuffle(); for (var i = 0; i < trainingSet.SequenceList.Count; i++) { var pSequence = trainingSet.SequenceList[i]; int wordCnt = 0; if (pSequence is Sequence) { wordCnt = (pSequence as Sequence).States.Length; } else { SequencePair sp = pSequence as SequencePair; if (sp.srcSentence.TokensList.Count > rnn.MaxSeqLength) { continue; } wordCnt = sp.tgtSequence.States.Length; } if (wordCnt > rnn.MaxSeqLength) { continue; } Interlocked.Add(ref processedWordCnt, wordCnt); int[] predicted; if (IsCRFTraining) { predicted = rnn.ProcessSequenceCRF(pSequence as Sequence, runningMode); } else { Matrix <float> m; predicted = rnn.ProcessSequence(pSequence, runningMode, false, out m); } int newTknErrCnt; if (pSequence is Sequence) { newTknErrCnt = GetErrorTokenNum(pSequence as Sequence, predicted); } else { newTknErrCnt = GetErrorTokenNum((pSequence as SequencePair).tgtSequence, predicted); } Interlocked.Add(ref tknErrCnt, newTknErrCnt); if (newTknErrCnt > 0) { Interlocked.Increment(ref sentErrCnt); } Interlocked.Increment(ref processedSequence); if (processedSequence % 1000 == 0) { Logger.WriteLine("Progress = {0} ", processedSequence / 1000 + "K/" + totalSequenceNum / 1000.0 + "K"); Logger.WriteLine(" Error token ratio = {0}%", (double)tknErrCnt / (double)processedWordCnt * 100.0); Logger.WriteLine(" Error sentence ratio = {0}%", (double)sentErrCnt / (double)processedSequence * 100.0); } if (ModelSettings.SaveStep > 0 && processedSequence % ModelSettings.SaveStep == 0) { //After processed every m_SaveStep sentences, save current model into a temporary file Logger.WriteLine("Saving temporary model into file..."); rnn.SaveModel("model.tmp"); } } }
public override int[] ProcessSeq2Seq(SequencePair pSequence, RunningMode runningMode) { throw new NotImplementedException(); }