Ejemplo n.º 1
0
        public void TagSpeed()
        {
            string trainResource = Analyzed2Train;
            string testResource = Analyzed2Test;
            int maxTokenCount = 1000000;
            double minAccuracy = 0.93;

            Sentence[] train = LoadAnalyzedCorpus(trainResource);
            Sentence[] test = LoadAnalyzedCorpus(testResource);

            List<Sentence> all = new List<Sentence>();
            int allTokenCount = 0;
            while (allTokenCount < maxTokenCount)
            {
                Sentence s = new Sentence();

                foreach (Sentence sentence in test)
                {
                    foreach (Token token in sentence)
                    {
                        s.Add(new Token(token));
                        allTokenCount++;
                        if (allTokenCount >= maxTokenCount) break;
                    }

                    if (allTokenCount >= maxTokenCount) break;
                }

                all.Add(s);

                if (allTokenCount >= maxTokenCount) break;
            }

            Assert.AreEqual(maxTokenCount, all.SelectMany(t => t).Count());

            LuMiiTagger tagger = new LuMiiTagger();
            tagger.Train(train);

            Stopwatch timer = new Stopwatch();
            timer.Start();
            tagger.Tag(all);
            timer.Stop();

            Token[] tokens = all.SelectMany(t => t).ToArray();
            double accuracy = (double)tokens.Count(t => t.IsTagCorrect) / tokens.Count();

            Assert.AreEqual(maxTokenCount, tokens.Length);

            Debug.WriteLine("Accuracy: {0:0.00}%", accuracy * 100);
            Debug.WriteLine("Tokens: {0}%", tokens.Length);
            Debug.WriteLine("Tag duration: {0} or {1:0} ms", timer.Elapsed, timer.ElapsedMilliseconds);
            Debug.WriteLine("Tag speed: {0:0.00} tokens/s", tokens.Length / timer.Elapsed.TotalSeconds);

            Assert.Greater(accuracy, minAccuracy);
            Assert.Less(accuracy, 0.97);
        }
Ejemplo n.º 2
0
        public void Model_Latest()
        {
            LuMiiCorpus corpus = new LuMiiCorpus();
            List<Sentence> all = new List<Sentence>();
            foreach (string resource in ModelLatestData)
                all.AddRange(corpus.Load(this.GetType().Assembly.GetManifestResourceStream(resource)).ToArray());

            LuMiiTagger tagger = new LuMiiTagger();
            tagger.Load();
            tagger.Tag(all);

            Token[] tokens = all.SelectMany(t => t).ToArray();
            double accuracy = (double)tokens.Count(t => t.IsTagCorrect) / tokens.Count();

            Assert.Greater(accuracy, 0.99);
        }
Ejemplo n.º 3
0
        public void Split(string name, double minAccuracy, Sentence[] train, Sentence[] test)
        {
            Assert.Greater(train.Length, 0);
            Assert.Greater(test.Length, 0);
            Assert.Greater(train.Length, test.Length);

            LuMiiTagger tagger = new LuMiiTagger();

            Stopwatch trainTimer = new Stopwatch();
            trainTimer.Start();
            tagger.Train(train);
            trainTimer.Stop();

            Stopwatch tagTimer = new Stopwatch();
            tagTimer.Start();
            tagger.Tag(test);
            tagTimer.Stop();

            Token[] tokens = test.SelectMany(t => t).ToArray();
            double accuracyTag = (double)tokens.Count(t => t.IsTagCorrect) / tokens.Count();
            double accuracyMsd = (double)tokens.Count(t => t.IsMsdCorrect) / tokens.Count();
            double accuracyMsdLemma = (double)tokens.Count(t => t.IsMsdCorrect && t.IsLemmaCorrect) / tokens.Count();
            double accuracyLemma = (double)tokens.Count(t => t.IsLemmaCorrect) / tokens.Count();

            Token[] lemmaIncorrect = tokens.Where(t => t.IsMsdCorrect && !t.IsLemmaCorrect).ToArray();

            Debug.WriteLine("Split validation for " + name);
            Debug.WriteLine("Train: {0} sentences, {1} tokens", train.Length, train.SelectMany(t => t).Count());
            Debug.WriteLine("Test: {0} sentences, {1} tokens", test.Length, test.SelectMany(t => t).Count());
            Debug.WriteLine("Accuracy tag: {0:0.00}%", accuracyTag * 100);
            Debug.WriteLine("Accuracy msd: {0:0.00}%", accuracyMsd * 100);
            Debug.WriteLine("Accuracy msd + lemma: {0:0.00}%", accuracyMsdLemma * 100);
            Debug.WriteLine("Accuracy lemma: {0:0.00}%", accuracyLemma * 100);
            Debug.WriteLine("Train duration: {0} or {1:0} ms", trainTimer.Elapsed, trainTimer.ElapsedMilliseconds);
            Debug.WriteLine("Tag duration: {0} or {1:0} ms", tagTimer.Elapsed, tagTimer.ElapsedMilliseconds);
            Debug.WriteLine("Tag speed: {0:0.00} tokens/s", tokens.Length / tagTimer.Elapsed.TotalSeconds);

            Assert.Greater(accuracyMsdLemma, minAccuracy);
            Assert.Less(accuracyMsdLemma, 0.97);
        }
Ejemplo n.º 4
0
        public void LoadSave()
        {
            string trainResource = Analyzed2Train;
            string testResource = Analyzed2Test;
            double minAccuracy = 0.93;

            Sentence[] train = LoadAnalyzedCorpus(trainResource);
            Sentence[] test = LoadAnalyzedCorpus(testResource);
            Sentence[] test2 = LoadAnalyzedCorpus(testResource);

            Assert.Greater(train.Length, 0);
            Assert.Greater(test.Length, 0);
            Assert.Greater(train.Length, test.Length);

            string filename = Path.GetTempFileName();

            LuMiiTagger tagger = new LuMiiTagger();
            tagger.Train(train);
            tagger.Save(filename);
            tagger.Tag(test);

            LuMiiTagger tagger2 = new LuMiiTagger();
            tagger2.Load(filename);
            tagger2.Tag(test2);

            Token[] tokens = test.SelectMany(t => t).ToArray();
            Token[] tokens2 = test2.SelectMany(t => t).ToArray();

            double accuracy = (double)tokens.Count(t => t.IsTagCorrect) / tokens.Count();
            double accuracy2 = (double)tokens2.Count(t => t.IsTagCorrect) / tokens2.Count();

            Assert.AreEqual(accuracy, accuracy2, 0.0001);
            Assert.Greater(accuracy, minAccuracy);
            Assert.Less(accuracy, 0.97);

            File.Delete(filename);
        }