Esempio n. 1
0
        public void Process(RNN <T> rnn, DataSet <T> trainingSet, RunningMode runningMode, int totalSequenceNum)
        {
            //Shffle training corpus
            trainingSet.Shuffle();

            for (var i = 0; i < trainingSet.SequenceList.Count; i++)
            {
                var pSequence = trainingSet.SequenceList[i];

                int wordCnt = 0;
                if (pSequence is Sequence)
                {
                    wordCnt = (pSequence as Sequence).States.Length;
                }
                else
                {
                    SequencePair sp = pSequence as SequencePair;
                    if (sp.srcSentence.TokensList.Count > rnn.MaxSeqLength)
                    {
                        continue;
                    }

                    wordCnt = sp.tgtSequence.States.Length;
                }

                if (wordCnt > rnn.MaxSeqLength)
                {
                    continue;
                }

                Interlocked.Add(ref processedWordCnt, wordCnt);

                int[] predicted;
                if (IsCRFTraining)
                {
                    predicted = rnn.ProcessSequenceCRF(pSequence as Sequence, runningMode);
                }
                else
                {
                    Matrix <float> m;
                    predicted = rnn.ProcessSequence(pSequence, runningMode, false, out m);
                }

                int newTknErrCnt;
                if (pSequence is Sequence)
                {
                    newTknErrCnt = GetErrorTokenNum(pSequence as Sequence, predicted);
                }
                else
                {
                    newTknErrCnt = GetErrorTokenNum((pSequence as SequencePair).tgtSequence, predicted);
                }

                Interlocked.Add(ref tknErrCnt, newTknErrCnt);
                if (newTknErrCnt > 0)
                {
                    Interlocked.Increment(ref sentErrCnt);
                }

                Interlocked.Increment(ref processedSequence);

                if (processedSequence % 1000 == 0)
                {
                    Logger.WriteLine("Progress = {0} ", processedSequence / 1000 + "K/" + totalSequenceNum / 1000.0 + "K");
                    Logger.WriteLine(" Error token ratio = {0}%", (double)tknErrCnt / (double)processedWordCnt * 100.0);
                    Logger.WriteLine(" Error sentence ratio = {0}%", (double)sentErrCnt / (double)processedSequence * 100.0);
                }

                if (ModelSettings.SaveStep > 0 && processedSequence % ModelSettings.SaveStep == 0)
                {
                    //After processed every m_SaveStep sentences, save current model into a temporary file
                    Logger.WriteLine("Saving temporary model into file...");
                    rnn.SaveModel("model.tmp");
                }
            }
        }