public void TestLexemes() { PolishStemmer s = new PolishStemmer(); assertEquals("żywotopisarstwo", stem(s, "żywotopisarstwie")[0]); assertEquals("abradować", stem(s, "abradowałoby")[0]); assertArrayEquals(new String[] { "żywotopisarstwo", "subst:sg:loc:n2" }, stem(s, "żywotopisarstwie")); assertArrayEquals(new String[] { "bazia", "subst:pl:inst:f" }, stem(s, "baziami")); // This word is not in the dictionary. assertNoStemFor(s, "martygalski"); }
//komenda wyszukanie hasła private void ExecuteSearchCommand(object obj) { var text = (string)obj; if (text != "") { var speller = new PolishStemmer(); var textWordData = (speller.lookup(text).toArray().FirstOrDefault() as WordData); if (textWordData != null) { text = textWordData.getStem().toString(); } Search(text.Trim().ToLower()); } }
public void Test_Gh27() { PolishStemmer stemmer = new PolishStemmer(); string input = "Nie zabrakło oczywiście wpadek. Największym zaskoczeniem okazał się dla nas strój Katarzyny Zielińskiej, której ewidentnie o coś chodziło, ale wciąż nie wiemy o co."; foreach (string t in Regex.Split(input.ToLower(new CultureInfo("pl")), "[\\s\\.\\,]+")) { Console.Out.WriteLine("> '" + t + "'"); foreach (WordData wd in stemmer.Lookup(t)) { Console.Out.WriteLine( " - " + (wd.GetStem() == null ? "<null>" : wd.GetStem().ToString()) + ", " + wd.GetTag()); } Console.Out.WriteLine(); } }
public void TestWordDataFields() { IStemmer s = new PolishStemmer(); String word = "liga"; IList <WordData> response = s.Lookup(word); assertEquals(2, response.Count); HashSet <String> stems = new HashSet <String>(); HashSet <String> tags = new HashSet <String>(); foreach (WordData wd in response) { stems.Add(wd.GetStem().ToString()); tags.Add(wd.GetTag().ToString()); assertSame(word, wd.Word.ToString()); } assertTrue(stems.Contains("ligać")); assertTrue(stems.Contains("liga")); assertTrue(tags.Contains("subst:sg:nom:f")); assertTrue(tags.Contains("verb:fin:sg:ter:imperf:nonrefl+verb:fin:sg:ter:imperf:refl.nonrefl")); // Repeat to make sure we get the same values consistently. foreach (WordData wd in response) { stems.Contains(wd.GetStem().ToString()); tags.Contains(wd.GetTag().ToString()); } //String ENCODING = "UTF-8"; Encoding ENCODING = Encoding.UTF8; // Run the same consistency check for the returned buffers. ByteBuffer temp = ByteBuffer.Allocate(100); foreach (WordData wd in response) { // Buffer should be copied. ByteBuffer copy = wd.GetStemBytes(null); String stem = ENCODING.GetString(copy.Array, copy.ArrayOffset + copy.Position, copy.Remaining); // The buffer should be present in stems set. assertTrue(stem, stems.Contains(stem)); // Buffer large enough to hold the contents. assertSame(temp, wd.GetStemBytes(temp)); // The copy and the clone should be identical. assertEquals(0, copy.CompareTo(temp)); } foreach (WordData wd in response) { // Buffer should be copied. ByteBuffer copy = wd.GetTagBytes(null); String tag = ENCODING.GetString(copy.Array, copy.ArrayOffset + copy.Position, copy.Remaining); // The buffer should be present in tags set. assertTrue(tag, tags.Contains(tag)); // Buffer large enough to hold the contents. temp.Clear(); assertSame(temp, wd.GetTagBytes(temp)); // The copy and the clone should be identical. assertEquals(0, copy.CompareTo(temp)); } foreach (WordData wd in response) { // Buffer should be copied. ByteBuffer copy = wd.GetWordBytes(null); assertNotNull(copy); assertEquals(0, copy.CompareTo(ByteBuffer.Wrap(ENCODING.GetBytes(word)))); } }