Пример #1
0
 private Sentence[] LoadAnalyzedCorpus(string resourceName)
 {
     LuMiiCorpus corpus = new LuMiiCorpus();
     using (Stream stream = this.GetType().Assembly.GetManifestResourceStream(resourceName))
         return corpus.Load(stream).ToArray();
 }
Пример #2
0
        private Sentence[] LoadUnanalyzedCorpus(string resourceName, bool ignoreIncorrect = false)
        {
            LuMiiCorpus corpus = new LuMiiCorpus();
            LuMiiMorphology morphology = new LuMiiMorphology();

            Sentence[] sentences = null;
            using (Stream stream = this.GetType().Assembly.GetManifestResourceStream(resourceName))
                sentences = corpus.Load(stream).ToArray();

            List<Sentence> goodSentences = new List<Sentence>();
            List<Sentence> ignoredSentences = new List<Sentence>();
            List<Token> ignoredTokens = new List<Token>();

            Stopwatch watch = new Stopwatch();
            watch.Start();
            foreach (Sentence sentence in sentences)
            {
                bool ignore = false;
                Sentence analyzedSentence = new Sentence();

                foreach (Token token in sentence)
                {
                    Tag[] possibleTags = morphology.Analyze(token.TextTrueCase).ToArray();

                    if (!possibleTags.Any(t => t.Equals(token.CorrectTag)))
                        ignore = true;

                    Token analyzedToken = new Token(token.TextTrueCase, possibleTags, token.CorrectTag, analyzedSentence);
                    analyzedSentence.Add(analyzedToken);
                }

                if (!ignoreIncorrect || !ignore)
                {
                    goodSentences.Add(analyzedSentence);
                }
                else
                {
                    ignoredSentences.Add(analyzedSentence);
                }
            }
            watch.Stop();
            Debug.WriteLine(watch.Elapsed);

            return goodSentences.ToArray();
        }
Пример #3
0
        public void Model_Latest()
        {
            LuMiiCorpus corpus = new LuMiiCorpus();
            List<Sentence> all = new List<Sentence>();
            foreach (string resource in ModelLatestData)
                all.AddRange(corpus.Load(this.GetType().Assembly.GetManifestResourceStream(resource)).ToArray());

            LuMiiTagger tagger = new LuMiiTagger();
            tagger.Load();
            tagger.Tag(all);

            Token[] tokens = all.SelectMany(t => t).ToArray();
            double accuracy = (double)tokens.Count(t => t.IsTagCorrect) / tokens.Count();

            Assert.Greater(accuracy, 0.99);
        }