public void TestDisambiguation() { var fsm = new FsmMorphologicalAnalyzer(); var corpus = new DisambiguationCorpus("../../../penntreebank.txt"); var algorithm = new LongestRootFirstDisambiguation(); algorithm.Train(corpus); var correctParse = 0; var correctRoot = 0; for (var i = 0; i < corpus.SentenceCount(); i++) { var sentenceAnalyses = fsm.RobustMorphologicalAnalysis(corpus.GetSentence(i)); var fsmParses = algorithm.Disambiguate(sentenceAnalyses); for (var j = 0; j < corpus.GetSentence(i).WordCount(); j++) { var word = (DisambiguatedWord)corpus.GetSentence(i).GetWord(j); if (fsmParses[j].TransitionList().ToLower(new CultureInfo("tr-TR")).Equals(word.GetParse().ToString().ToLower(new CultureInfo("tr-TR")))) { correctParse++; correctRoot++; } else { if (fsmParses[j].GetWord().Equals(word.GetParse().GetWord())) { correctRoot++; } } } } Assert.AreEqual(0.9193, (correctRoot + 0.0) / corpus.NumberOfWords(), 0.0001); Assert.AreEqual(0.8273, (correctParse + 0.0) / corpus.NumberOfWords(), 0.0001); }
/** * <summary> Constructor for the class.</summary> * <param name="rootWordStatistics">The object contains information about the selected correct root words in a corpus for a set * of possible lemma. For example, the lemma * `günü': 2 possible root words `gün' and `günü' * `çağlar' : 2 possible root words `çağ' and `çağlar'</param> */ public TurkishSentenceAutoDisambiguator() : base(new FsmMorphologicalAnalyzer()) { longestRootFirstDisambiguation = new LongestRootFirstDisambiguation(); }