public void Split(string name, double minAccuracy, Sentence[] train, Sentence[] test) { Assert.Greater(train.Length, 0); Assert.Greater(test.Length, 0); Assert.Greater(train.Length, test.Length); LuMiiTagger tagger = new LuMiiTagger(); Stopwatch trainTimer = new Stopwatch(); trainTimer.Start(); tagger.Train(train); trainTimer.Stop(); Stopwatch tagTimer = new Stopwatch(); tagTimer.Start(); tagger.Tag(test); tagTimer.Stop(); Token[] tokens = test.SelectMany(t => t).ToArray(); double accuracyTag = (double)tokens.Count(t => t.IsTagCorrect) / tokens.Count(); double accuracyMsd = (double)tokens.Count(t => t.IsMsdCorrect) / tokens.Count(); double accuracyMsdLemma = (double)tokens.Count(t => t.IsMsdCorrect && t.IsLemmaCorrect) / tokens.Count(); double accuracyLemma = (double)tokens.Count(t => t.IsLemmaCorrect) / tokens.Count(); Token[] lemmaIncorrect = tokens.Where(t => t.IsMsdCorrect && !t.IsLemmaCorrect).ToArray(); Debug.WriteLine("Split validation for " + name); Debug.WriteLine("Train: {0} sentences, {1} tokens", train.Length, train.SelectMany(t => t).Count()); Debug.WriteLine("Test: {0} sentences, {1} tokens", test.Length, test.SelectMany(t => t).Count()); Debug.WriteLine("Accuracy tag: {0:0.00}%", accuracyTag * 100); Debug.WriteLine("Accuracy msd: {0:0.00}%", accuracyMsd * 100); Debug.WriteLine("Accuracy msd + lemma: {0:0.00}%", accuracyMsdLemma * 100); Debug.WriteLine("Accuracy lemma: {0:0.00}%", accuracyLemma * 100); Debug.WriteLine("Train duration: {0} or {1:0} ms", trainTimer.Elapsed, trainTimer.ElapsedMilliseconds); Debug.WriteLine("Tag duration: {0} or {1:0} ms", tagTimer.Elapsed, tagTimer.ElapsedMilliseconds); Debug.WriteLine("Tag speed: {0:0.00} tokens/s", tokens.Length / tagTimer.Elapsed.TotalSeconds); Assert.Greater(accuracyMsdLemma, minAccuracy); Assert.Less(accuracyMsdLemma, 0.97); }
public void TagSpeed() { string trainResource = Analyzed2Train; string testResource = Analyzed2Test; int maxTokenCount = 1000000; double minAccuracy = 0.93; Sentence[] train = LoadAnalyzedCorpus(trainResource); Sentence[] test = LoadAnalyzedCorpus(testResource); List<Sentence> all = new List<Sentence>(); int allTokenCount = 0; while (allTokenCount < maxTokenCount) { Sentence s = new Sentence(); foreach (Sentence sentence in test) { foreach (Token token in sentence) { s.Add(new Token(token)); allTokenCount++; if (allTokenCount >= maxTokenCount) break; } if (allTokenCount >= maxTokenCount) break; } all.Add(s); if (allTokenCount >= maxTokenCount) break; } Assert.AreEqual(maxTokenCount, all.SelectMany(t => t).Count()); LuMiiTagger tagger = new LuMiiTagger(); tagger.Train(train); Stopwatch timer = new Stopwatch(); timer.Start(); tagger.Tag(all); timer.Stop(); Token[] tokens = all.SelectMany(t => t).ToArray(); double accuracy = (double)tokens.Count(t => t.IsTagCorrect) / tokens.Count(); Assert.AreEqual(maxTokenCount, tokens.Length); Debug.WriteLine("Accuracy: {0:0.00}%", accuracy * 100); Debug.WriteLine("Tokens: {0}%", tokens.Length); Debug.WriteLine("Tag duration: {0} or {1:0} ms", timer.Elapsed, timer.ElapsedMilliseconds); Debug.WriteLine("Tag speed: {0:0.00} tokens/s", tokens.Length / timer.Elapsed.TotalSeconds); Assert.Greater(accuracy, minAccuracy); Assert.Less(accuracy, 0.97); }
public void LoadSave() { string trainResource = Analyzed2Train; string testResource = Analyzed2Test; double minAccuracy = 0.93; Sentence[] train = LoadAnalyzedCorpus(trainResource); Sentence[] test = LoadAnalyzedCorpus(testResource); Sentence[] test2 = LoadAnalyzedCorpus(testResource); Assert.Greater(train.Length, 0); Assert.Greater(test.Length, 0); Assert.Greater(train.Length, test.Length); string filename = Path.GetTempFileName(); LuMiiTagger tagger = new LuMiiTagger(); tagger.Train(train); tagger.Save(filename); tagger.Tag(test); LuMiiTagger tagger2 = new LuMiiTagger(); tagger2.Load(filename); tagger2.Tag(test2); Token[] tokens = test.SelectMany(t => t).ToArray(); Token[] tokens2 = test2.SelectMany(t => t).ToArray(); double accuracy = (double)tokens.Count(t => t.IsTagCorrect) / tokens.Count(); double accuracy2 = (double)tokens2.Count(t => t.IsTagCorrect) / tokens2.Count(); Assert.AreEqual(accuracy, accuracy2, 0.0001); Assert.Greater(accuracy, minAccuracy); Assert.Less(accuracy, 0.97); File.Delete(filename); }