public void Process(RNN <T> rnn, DataSet <T> trainingSet, RunningMode runningMode, int totalSequenceNum) { //Shffle training corpus trainingSet.Shuffle(); for (var i = 0; i < trainingSet.SequenceList.Count; i++) { var pSequence = trainingSet.SequenceList[i]; int wordCnt = 0; if (pSequence is Sequence) { wordCnt = (pSequence as Sequence).States.Length; } else { SequencePair sp = pSequence as SequencePair; if (sp.srcSentence.TokensList.Count > rnn.MaxSeqLength) { continue; } wordCnt = sp.tgtSequence.States.Length; } if (wordCnt > rnn.MaxSeqLength) { continue; } Interlocked.Add(ref processedWordCnt, wordCnt); int[] predicted; if (IsCRFTraining) { predicted = rnn.ProcessSequenceCRF(pSequence as Sequence, runningMode); } else { Matrix <float> m; predicted = rnn.ProcessSequence(pSequence, runningMode, false, out m); } int newTknErrCnt; if (pSequence is Sequence) { newTknErrCnt = GetErrorTokenNum(pSequence as Sequence, predicted); } else { newTknErrCnt = GetErrorTokenNum((pSequence as SequencePair).tgtSequence, predicted); } Interlocked.Add(ref tknErrCnt, newTknErrCnt); if (newTknErrCnt > 0) { Interlocked.Increment(ref sentErrCnt); } Interlocked.Increment(ref processedSequence); if (processedSequence % 1000 == 0) { Logger.WriteLine("Progress = {0} ", processedSequence / 1000 + "K/" + totalSequenceNum / 1000.0 + "K"); Logger.WriteLine(" Error token ratio = {0}%", (double)tknErrCnt / (double)processedWordCnt * 100.0); Logger.WriteLine(" Error sentence ratio = {0}%", (double)sentErrCnt / (double)processedSequence * 100.0); } if (ModelSettings.SaveStep > 0 && processedSequence % ModelSettings.SaveStep == 0) { //After processed every m_SaveStep sentences, save current model into a temporary file Logger.WriteLine("Saving temporary model into file..."); rnn.SaveModel("model.tmp"); } } }