public double TrainNet(DataSet <T> trainingSet, int iter) { var start = DateTime.Now; Logger.WriteLine("Iter " + iter + " begins with learning rate alpha = " + RNNHelper.LearningRate + " ..."); //Initialize varibles logp = 0; //Shffle training corpus trainingSet.Shuffle(); var numSequence = trainingSet.SequenceList.Count; var wordCnt = 0; var tknErrCnt = 0; var sentErrCnt = 0; Logger.WriteLine("Progress = 0/" + numSequence / 1000.0 + "K\r"); for (var curSequence = 0; curSequence < numSequence; curSequence++) { var pSequence = trainingSet.SequenceList[curSequence]; if (pSequence is Sequence) { wordCnt += (pSequence as Sequence).States.Length; } else { wordCnt += (pSequence as SequencePair).tgtSequence.States.Length; } int[] predicted; if (IsCRFTraining) { predicted = ProcessSequenceCRF(pSequence as Sequence, RunningMode.Training); } else if (pSequence is SequencePair) { predicted = ProcessSeq2Seq(pSequence as SequencePair, RunningMode.Training); } else { Matrix <float> m; predicted = ProcessSequence(pSequence as Sequence, RunningMode.Training, false, out m); } int newTknErrCnt; if (pSequence is Sequence) { newTknErrCnt = GetErrorTokenNum(pSequence as Sequence, predicted); } else { newTknErrCnt = GetErrorTokenNum((pSequence as SequencePair).tgtSequence, predicted); } tknErrCnt += newTknErrCnt; if (newTknErrCnt > 0) { sentErrCnt++; } if ((curSequence + 1) % 1000 == 0) { Logger.WriteLine("Progress = {0} ", (curSequence + 1) / 1000 + "K/" + numSequence / 1000.0 + "K"); Logger.WriteLine(" Train cross-entropy = {0} ", -logp / Math.Log10(2.0) / wordCnt); Logger.WriteLine(" Error token ratio = {0}%", (double)tknErrCnt / (double)wordCnt * 100.0); Logger.WriteLine(" Error sentence ratio = {0}%", (double)sentErrCnt / (double)curSequence * 100.0); } if (SaveStep > 0 && (curSequence + 1) % SaveStep == 0) { //After processed every m_SaveStep sentences, save current model into a temporary file Logger.WriteLine("Saving temporary model into file..."); SaveModel("model.tmp"); } } var now = DateTime.Now; var duration = now.Subtract(start); var entropy = -logp / Math.Log10(2.0) / wordCnt; var ppl = exp_10(-logp / wordCnt); Logger.WriteLine("Iter " + iter + " completed"); Logger.WriteLine("Sentences = " + numSequence + ", time escape = " + duration + "s, speed = " + numSequence / duration.TotalSeconds); Logger.WriteLine("In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl); return(ppl); }
public virtual double TrainNet(DataSet trainingSet, int iter) { DateTime start = DateTime.Now; Logger.WriteLine("Iter " + iter + " begins with learning rate alpha = " + LearningRate + " ..."); //Initialize varibles logp = 0; //Shffle training corpus trainingSet.Shuffle(); int numSequence = trainingSet.SequenceList.Count; int wordCnt = 0; int tknErrCnt = 0; int sentErrCnt = 0; Logger.WriteLine("Progress = 0/" + numSequence / 1000.0 + "K\r"); for (int curSequence = 0; curSequence < numSequence; curSequence++) { Sequence pSequence = trainingSet.SequenceList[curSequence]; wordCnt += pSequence.States.Length; int[] predicted; if (IsCRFTraining == true) { predicted = PredictSentenceCRF(pSequence, RunningMode.Train); } else { Matrix <double> m; m = PredictSentence(pSequence, RunningMode.Train); predicted = GetBestResult(m); } int newTknErrCnt = GetErrorTokenNum(pSequence, predicted); tknErrCnt += newTknErrCnt; if (newTknErrCnt > 0) { sentErrCnt++; } if ((curSequence + 1) % 1000 == 0) { Logger.WriteLine("Progress = {0} ", (curSequence + 1) / 1000 + "K/" + numSequence / 1000.0 + "K"); Logger.WriteLine(" Train cross-entropy = {0} ", -logp / Math.Log10(2.0) / wordCnt); Logger.WriteLine(" Error token ratio = {0}%", (double)tknErrCnt / (double)wordCnt * 100.0); Logger.WriteLine(" Error sentence ratio = {0}%", (double)sentErrCnt / (double)curSequence * 100.0); } if (SaveStep > 0 && (curSequence + 1) % SaveStep == 0) { //After processed every m_SaveStep sentences, save current model into a temporary file Logger.WriteLine("Saving temporary model into file..."); SaveModel(ModelTempFile); } } DateTime now = DateTime.Now; TimeSpan duration = now.Subtract(start); double entropy = -logp / Math.Log10(2.0) / wordCnt; double ppl = exp_10(-logp / wordCnt); Logger.WriteLine("Iter " + iter + " completed"); Logger.WriteLine("Sentences = " + numSequence + ", time escape = " + duration + "s, speed = " + numSequence / duration.TotalSeconds); Logger.WriteLine("In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl); return(ppl); }
public void Process(RNN <T> rnn, DataSet <T> trainingSet, RunningMode runningMode, int totalSequenceNum) { //Shffle training corpus trainingSet.Shuffle(); for (var i = 0; i < trainingSet.SequenceList.Count; i++) { var pSequence = trainingSet.SequenceList[i]; int wordCnt = 0; if (pSequence is Sequence) { wordCnt = (pSequence as Sequence).States.Length; } else { SequencePair sp = pSequence as SequencePair; if (sp.srcSentence.TokensList.Count > rnn.MaxSeqLength) { continue; } wordCnt = sp.tgtSequence.States.Length; } if (wordCnt > rnn.MaxSeqLength) { continue; } Interlocked.Add(ref processedWordCnt, wordCnt); int[] predicted; if (IsCRFTraining) { predicted = rnn.ProcessSequenceCRF(pSequence as Sequence, runningMode); } else { Matrix <float> m; predicted = rnn.ProcessSequence(pSequence, runningMode, false, out m); } int newTknErrCnt; if (pSequence is Sequence) { newTknErrCnt = GetErrorTokenNum(pSequence as Sequence, predicted); } else { newTknErrCnt = GetErrorTokenNum((pSequence as SequencePair).tgtSequence, predicted); } Interlocked.Add(ref tknErrCnt, newTknErrCnt); if (newTknErrCnt > 0) { Interlocked.Increment(ref sentErrCnt); } Interlocked.Increment(ref processedSequence); if (processedSequence % 1000 == 0) { Logger.WriteLine("Progress = {0} ", processedSequence / 1000 + "K/" + totalSequenceNum / 1000.0 + "K"); Logger.WriteLine(" Error token ratio = {0}%", (double)tknErrCnt / (double)processedWordCnt * 100.0); Logger.WriteLine(" Error sentence ratio = {0}%", (double)sentErrCnt / (double)processedSequence * 100.0); } if (ModelSettings.SaveStep > 0 && processedSequence % ModelSettings.SaveStep == 0) { //After processed every m_SaveStep sentences, save current model into a temporary file Logger.WriteLine("Saving temporary model into file..."); rnn.SaveModel("model.tmp"); } } }
/// <summary> /// Process entire corpus set by given RNN /// </summary> /// <param name="rnns"></param> /// <param name="corpusSet"></param> /// <param name="runningMode"></param> public void Process(List <RNN <T> > rnns, DataSet <T> corpusSet, RunningMode runningMode) { parallelOptions = new ParallelOptions(); parallelOptions.MaxDegreeOfParallelism = Environment.ProcessorCount; processedSequence = 0; processedWordCnt = 0; tknErrCnt = 0; sentErrCnt = 0; corpusSet.Shuffle(); //Add RNN instance into job queue ConcurrentQueue <RNN <T> > qRNNs = new ConcurrentQueue <RNN <T> >(); foreach (var rnn in rnns) { qRNNs.Enqueue(rnn); } Parallel.For(0, corpusSet.SequenceList.Count, parallelOptions, i => { //Get a free RNN instance for running RNN <T> rnn; if (qRNNs.TryDequeue(out rnn) == false) { //The queue is empty, so we clone a new one rnn = rnns[0].Clone(); Logger.WriteLine("Cloned a new RNN instance for training."); } var pSequence = corpusSet.SequenceList[i]; //Calcuate how many tokens we are going to process in this sequence int tokenCnt = 0; if (pSequence is Sequence) { tokenCnt = (pSequence as Sequence).States.Length; } else { SequencePair sp = pSequence as SequencePair; if (sp.srcSentence.TokensList.Count > rnn.MaxSeqLength) { qRNNs.Enqueue(rnn); return; } tokenCnt = sp.tgtSequence.States.Length; } //This sequence is too long, so we ignore it if (tokenCnt > rnn.MaxSeqLength) { qRNNs.Enqueue(rnn); return; } //Run neural network int[] predicted; if (IsCRFTraining) { predicted = rnn.ProcessSequenceCRF(pSequence as Sequence, runningMode); } else { Matrix <float> m; predicted = rnn.ProcessSequence(pSequence, runningMode, false, out m); } //Update counters Interlocked.Add(ref processedWordCnt, tokenCnt); Interlocked.Increment(ref processedSequence); Interlocked.Increment(ref processMiniBatch); int newTknErrCnt; if (pSequence is Sequence) { newTknErrCnt = GetErrorTokenNum(pSequence as Sequence, predicted); } else { newTknErrCnt = GetErrorTokenNum((pSequence as SequencePair).tgtSequence, predicted); } Interlocked.Add(ref tknErrCnt, newTknErrCnt); if (newTknErrCnt > 0) { Interlocked.Increment(ref sentErrCnt); } //Update weights //We only allow one thread to update weights, and other threads keep running to train or predict given sequences //Note: we don't add any lock when updating weights and deltas for weights in order to improve performance singificantly, //so that means race condition will happen and it's okay for us. if (runningMode == RunningMode.Training && processMiniBatch > 0 && processMiniBatch % ModelSettings.MiniBatchSize == 0 && updatingWeights == 0) { Interlocked.Increment(ref updatingWeights); if (updatingWeights == 1) { rnn.UpdateWeights(); Interlocked.Exchange(ref processMiniBatch, 0); } Interlocked.Decrement(ref updatingWeights); } //Show progress information if (processedSequence % 1000 == 0) { Logger.WriteLine("Progress = {0} ", processedSequence / 1000 + "K/" + corpusSet.SequenceList.Count / 1000.0 + "K"); Logger.WriteLine(" Error token ratio = {0}%", (double)tknErrCnt / (double)processedWordCnt * 100.0); Logger.WriteLine(" Error sentence ratio = {0}%", (double)sentErrCnt / (double)processedSequence * 100.0); } //Save intermediate model file if (ModelSettings.SaveStep > 0 && processedSequence % ModelSettings.SaveStep == 0) { //After processed every m_SaveStep sentences, save current model into a temporary file Logger.WriteLine("Saving temporary model into file..."); try { rnn.SaveModel("model.tmp"); } catch (Exception err) { Logger.WriteLine($"Fail to save temporary model into file. Error: {err.Message.ToString()}"); } } qRNNs.Enqueue(rnn); }); }
public double TrainNet(DataSet trainingSet, int iter) { DateTime start = DateTime.Now; Logger.WriteLine("Iter " + iter + " begins with learning rate alpha = " + RNNHelper.LearningRate + " ..."); //Initialize varibles logp = 0; //Shffle training corpus trainingSet.Shuffle(); int numSequence = trainingSet.SequenceList.Count; int wordCnt = 0; int tknErrCnt = 0; int sentErrCnt = 0; Logger.WriteLine("Progress = 0/" + numSequence / 1000.0 + "K\r"); for (int curSequence = 0; curSequence < numSequence; curSequence++) { Sequence pSequence = trainingSet.SequenceList[curSequence]; wordCnt += pSequence.States.Length; int[] predicted; if (IsCRFTraining == true) { predicted = ProcessSequenceCRF(pSequence, RunningMode.Train); } else { Matrix<double> m; m = ProcessSequence(pSequence, RunningMode.Train); predicted = GetBestResult(m); } int newTknErrCnt = GetErrorTokenNum(pSequence, predicted); tknErrCnt += newTknErrCnt; if (newTknErrCnt > 0) { sentErrCnt++; } if ((curSequence + 1) % 1000 == 0) { Logger.WriteLine("Progress = {0} ", (curSequence + 1) / 1000 + "K/" + numSequence / 1000.0 + "K"); Logger.WriteLine(" Train cross-entropy = {0} ", -logp / Math.Log10(2.0) / wordCnt); Logger.WriteLine(" Error token ratio = {0}%", (double)tknErrCnt / (double)wordCnt * 100.0); Logger.WriteLine(" Error sentence ratio = {0}%", (double)sentErrCnt / (double)curSequence * 100.0); } if (SaveStep > 0 && (curSequence + 1) % SaveStep == 0) { //After processed every m_SaveStep sentences, save current model into a temporary file Logger.WriteLine("Saving temporary model into file..."); SaveModel(ModelTempFile); } } DateTime now = DateTime.Now; TimeSpan duration = now.Subtract(start); double entropy = -logp / Math.Log10(2.0) / wordCnt; double ppl = exp_10(-logp / wordCnt); Logger.WriteLine("Iter " + iter + " completed"); Logger.WriteLine("Sentences = " + numSequence + ", time escape = " + duration + "s, speed = " + numSequence / duration.TotalSeconds); Logger.WriteLine("In training: log probability = " + logp + ", cross-entropy = " + entropy + ", perplexity = " + ppl); return ppl; }