public void TestDeasciify() { var fsm = new FsmMorphologicalAnalyzer(); var nGram = new NGram <string>("../../../ngram.txt"); nGram.CalculateNGramProbabilities(new NoSmoothing <string>()); var nGramDeasciifier = new NGramDeasciifier(fsm, nGram, true); var simpleAsciifier = new SimpleAsciifier(); var corpus = new Corpus.Corpus("../../../corpus.txt"); for (var i = 0; i < corpus.SentenceCount(); i++) { var sentence = corpus.GetSentence(i); for (var j = 1; j < sentence.WordCount(); j++) { if (fsm.MorphologicalAnalysis(sentence.GetWord(j).GetName()).Size() > 0) { var asciified = simpleAsciifier.Asciify(sentence.GetWord(j)); if (!asciified.Equals(sentence.GetWord(j).GetName())) { var deasciified = nGramDeasciifier.Deasciify( new Sentence(sentence.GetWord(j - 1).GetName() + " " + sentence.GetWord(j).GetName())); Assert.AreEqual(sentence.GetWord(j).GetName(), deasciified.GetWord(1).GetName()); } } } } }
/** * <summary>Updates sentencePosition, sentenceIndex (if needed) and returns the current sentence processed. If one sentence * is finished, the position shows the beginning of the next sentence and sentenceIndex is incremented. If the * current sentence is the last sentence, the system shuffles the sentences and returns the first sentence.</summary> * <param name="currentSentence">Current sentence processed.</param> * <returns>If current sentence is not changed, currentSentence; if changed the next sentence; if next sentence is * the last sentence; shuffles the corpus and returns the first sentence.</returns> */ public Sentence SentenceUpdate(Sentence currentSentence) { _sentencePosition++; if (_sentencePosition >= currentSentence.WordCount()) { _wordCount += currentSentence.WordCount(); _sentenceIndex++; _sentencePosition = 0; if (_sentenceIndex == corpus.SentenceCount()) { _iterationCount++; _wordCount = 0; _lastWordCount = 0; _sentenceIndex = 0; corpus.ShuffleSentences(1); } return(corpus.GetSentence(_sentenceIndex)); } return(currentSentence); }