public void TestDeasciify()
        {
            var fsm   = new FsmMorphologicalAnalyzer();
            var nGram = new NGram <string>("../../../ngram.txt");

            nGram.CalculateNGramProbabilities(new NoSmoothing <string>());
            var nGramDeasciifier = new NGramDeasciifier(fsm, nGram, true);
            var simpleAsciifier  = new SimpleAsciifier();
            var corpus           = new Corpus.Corpus("../../../corpus.txt");

            for (var i = 0; i < corpus.SentenceCount(); i++)
            {
                var sentence = corpus.GetSentence(i);
                for (var j = 1; j < sentence.WordCount(); j++)
                {
                    if (fsm.MorphologicalAnalysis(sentence.GetWord(j).GetName()).Size() > 0)
                    {
                        var asciified = simpleAsciifier.Asciify(sentence.GetWord(j));
                        if (!asciified.Equals(sentence.GetWord(j).GetName()))
                        {
                            var deasciified = nGramDeasciifier.Deasciify(
                                new Sentence(sentence.GetWord(j - 1).GetName() + " " + sentence.GetWord(j).GetName()));
                            Assert.AreEqual(sentence.GetWord(j).GetName(), deasciified.GetWord(1).GetName());
                        }
                    }
                }
            }
        }
예제 #2
0
        /**
         * <summary>Updates sentencePosition, sentenceIndex (if needed) and returns the current sentence processed. If one sentence
         * is finished, the position shows the beginning of the next sentence and sentenceIndex is incremented. If the
         * current sentence is the last sentence, the system shuffles the sentences and returns the first sentence.</summary>
         * <param name="currentSentence">Current sentence processed.</param>
         * <returns>If current sentence is not changed, currentSentence; if changed the next sentence; if next sentence is
         * the last sentence; shuffles the corpus and returns the first sentence.</returns>
         */
        public Sentence SentenceUpdate(Sentence currentSentence)
        {
            _sentencePosition++;
            if (_sentencePosition >= currentSentence.WordCount())
            {
                _wordCount += currentSentence.WordCount();
                _sentenceIndex++;
                _sentencePosition = 0;
                if (_sentenceIndex == corpus.SentenceCount())
                {
                    _iterationCount++;
                    _wordCount     = 0;
                    _lastWordCount = 0;
                    _sentenceIndex = 0;
                    corpus.ShuffleSentences(1);
                }

                return(corpus.GetSentence(_sentenceIndex));
            }

            return(currentSentence);
        }