public void roku() { LuMiiMorphology morphology = new LuMiiMorphology(); LuMiiTag[] tags = morphology.Analyze("roku").ToArray(); Assert.AreEqual(3, tags.Length); Assert.IsTrue(tags.Any(t => t.Lemma == "roka" && t.Msd == "ncfsa4")); Assert.IsTrue(tags.Any(t => t.Lemma == "roka" && t.Msd == "ncfpg4")); Assert.IsTrue(tags.Any(t => t.Lemma == "rakt" && t.Msd == "vmnipt11san")); }
public LuMiiTokenizer(LuMiiMorphology morphology) { this.morphology = morphology; }
public void pokeplerksts() { LuMiiMorphology morphology = new LuMiiMorphology(); LuMiiTag[] tags = morphology.Analyze("pokeplerksts").ToArray(); Assert.AreEqual(0, tags.Length); }
private Sentence[] LoadUnanalyzedCorpus(string resourceName, bool ignoreIncorrect = false) { LuMiiCorpus corpus = new LuMiiCorpus(); LuMiiMorphology morphology = new LuMiiMorphology(); Sentence[] sentences = null; using (Stream stream = this.GetType().Assembly.GetManifestResourceStream(resourceName)) sentences = corpus.Load(stream).ToArray(); List<Sentence> goodSentences = new List<Sentence>(); List<Sentence> ignoredSentences = new List<Sentence>(); List<Token> ignoredTokens = new List<Token>(); Stopwatch watch = new Stopwatch(); watch.Start(); foreach (Sentence sentence in sentences) { bool ignore = false; Sentence analyzedSentence = new Sentence(); foreach (Token token in sentence) { Tag[] possibleTags = morphology.Analyze(token.TextTrueCase).ToArray(); if (!possibleTags.Any(t => t.Equals(token.CorrectTag))) ignore = true; Token analyzedToken = new Token(token.TextTrueCase, possibleTags, token.CorrectTag, analyzedSentence); analyzedSentence.Add(analyzedToken); } if (!ignoreIncorrect || !ignore) { goodSentences.Add(analyzedSentence); } else { ignoredSentences.Add(analyzedSentence); } } watch.Stop(); Debug.WriteLine(watch.Elapsed); return goodSentences.ToArray(); }