Exemplo n.º 1
0
 public void roku()
 {
     LuMiiMorphology morphology = new LuMiiMorphology();
     LuMiiTag[] tags = morphology.Analyze("roku").ToArray();
     Assert.AreEqual(3, tags.Length);
     Assert.IsTrue(tags.Any(t => t.Lemma == "roka" && t.Msd == "ncfsa4"));
     Assert.IsTrue(tags.Any(t => t.Lemma == "roka" && t.Msd == "ncfpg4"));
     Assert.IsTrue(tags.Any(t => t.Lemma == "rakt" && t.Msd == "vmnipt11san"));
 }
Exemplo n.º 2
0
 public LuMiiTokenizer(LuMiiMorphology morphology)
 {
     this.morphology = morphology;
 }
Exemplo n.º 3
0
 public void pokeplerksts()
 {
     LuMiiMorphology morphology = new LuMiiMorphology();
     LuMiiTag[] tags = morphology.Analyze("pokeplerksts").ToArray();
     Assert.AreEqual(0, tags.Length);
 }
Exemplo n.º 4
0
        private Sentence[] LoadUnanalyzedCorpus(string resourceName, bool ignoreIncorrect = false)
        {
            LuMiiCorpus corpus = new LuMiiCorpus();
            LuMiiMorphology morphology = new LuMiiMorphology();

            Sentence[] sentences = null;
            using (Stream stream = this.GetType().Assembly.GetManifestResourceStream(resourceName))
                sentences = corpus.Load(stream).ToArray();

            List<Sentence> goodSentences = new List<Sentence>();
            List<Sentence> ignoredSentences = new List<Sentence>();
            List<Token> ignoredTokens = new List<Token>();

            Stopwatch watch = new Stopwatch();
            watch.Start();
            foreach (Sentence sentence in sentences)
            {
                bool ignore = false;
                Sentence analyzedSentence = new Sentence();

                foreach (Token token in sentence)
                {
                    Tag[] possibleTags = morphology.Analyze(token.TextTrueCase).ToArray();

                    if (!possibleTags.Any(t => t.Equals(token.CorrectTag)))
                        ignore = true;

                    Token analyzedToken = new Token(token.TextTrueCase, possibleTags, token.CorrectTag, analyzedSentence);
                    analyzedSentence.Add(analyzedToken);
                }

                if (!ignoreIncorrect || !ignore)
                {
                    goodSentences.Add(analyzedSentence);
                }
                else
                {
                    ignoredSentences.Add(analyzedSentence);
                }
            }
            watch.Stop();
            Debug.WriteLine(watch.Elapsed);

            return goodSentences.ToArray();
        }