Beispiel #1
0
        public void TestLexemes()
        {
            PolishStemmer s = new PolishStemmer();

            assertEquals("żywotopisarstwo", stem(s, "żywotopisarstwie")[0]);
            assertEquals("abradować", stem(s, "abradowałoby")[0]);

            assertArrayEquals(new String[] { "żywotopisarstwo", "subst:sg:loc:n2" }, stem(s, "żywotopisarstwie"));
            assertArrayEquals(new String[] { "bazia", "subst:pl:inst:f" }, stem(s, "baziami"));

            // This word is not in the dictionary.
            assertNoStemFor(s, "martygalski");
        }
        //komenda wyszukanie hasła
        private void ExecuteSearchCommand(object obj)
        {
            var text = (string)obj;

            if (text != "")
            {
                var speller      = new PolishStemmer();
                var textWordData = (speller.lookup(text).toArray().FirstOrDefault() as WordData);

                if (textWordData != null)
                {
                    text = textWordData.getStem().toString();
                }

                Search(text.Trim().ToLower());
            }
        }
Beispiel #3
0
        public void Test_Gh27()
        {
            PolishStemmer stemmer = new PolishStemmer();

            string input = "Nie zabrakło oczywiście wpadek. Największym zaskoczeniem okazał się dla nas strój Katarzyny Zielińskiej, której ewidentnie o coś chodziło, ale wciąż nie wiemy o co.";

            foreach (string t in Regex.Split(input.ToLower(new CultureInfo("pl")), "[\\s\\.\\,]+"))
            {
                Console.Out.WriteLine("> '" + t + "'");
                foreach (WordData wd in stemmer.Lookup(t))
                {
                    Console.Out.WriteLine(
                        "  - " +
                        (wd.GetStem() == null ? "<null>" : wd.GetStem().ToString()) + ", " + wd.GetTag());
                }
                Console.Out.WriteLine();
            }
        }
Beispiel #4
0
        public void TestWordDataFields()
        {
            IStemmer s = new PolishStemmer();

            String           word     = "liga";
            IList <WordData> response = s.Lookup(word);

            assertEquals(2, response.Count);

            HashSet <String> stems = new HashSet <String>();
            HashSet <String> tags  = new HashSet <String>();

            foreach (WordData wd in response)
            {
                stems.Add(wd.GetStem().ToString());
                tags.Add(wd.GetTag().ToString());
                assertSame(word, wd.Word.ToString());
            }
            assertTrue(stems.Contains("ligać"));
            assertTrue(stems.Contains("liga"));
            assertTrue(tags.Contains("subst:sg:nom:f"));
            assertTrue(tags.Contains("verb:fin:sg:ter:imperf:nonrefl+verb:fin:sg:ter:imperf:refl.nonrefl"));

            // Repeat to make sure we get the same values consistently.
            foreach (WordData wd in response)
            {
                stems.Contains(wd.GetStem().ToString());
                tags.Contains(wd.GetTag().ToString());
            }

            //String ENCODING = "UTF-8";
            Encoding ENCODING = Encoding.UTF8;

            // Run the same consistency check for the returned buffers.
            ByteBuffer temp = ByteBuffer.Allocate(100);

            foreach (WordData wd in response)
            {
                // Buffer should be copied.
                ByteBuffer copy = wd.GetStemBytes(null);
                String     stem = ENCODING.GetString(copy.Array, copy.ArrayOffset + copy.Position, copy.Remaining);
                // The buffer should be present in stems set.
                assertTrue(stem, stems.Contains(stem));
                // Buffer large enough to hold the contents.
                assertSame(temp, wd.GetStemBytes(temp));
                // The copy and the clone should be identical.
                assertEquals(0, copy.CompareTo(temp));
            }

            foreach (WordData wd in response)
            {
                // Buffer should be copied.
                ByteBuffer copy = wd.GetTagBytes(null);
                String     tag  = ENCODING.GetString(copy.Array, copy.ArrayOffset + copy.Position, copy.Remaining);
                // The buffer should be present in tags set.
                assertTrue(tag, tags.Contains(tag));
                // Buffer large enough to hold the contents.
                temp.Clear();
                assertSame(temp, wd.GetTagBytes(temp));
                // The copy and the clone should be identical.
                assertEquals(0, copy.CompareTo(temp));
            }

            foreach (WordData wd in response)
            {
                // Buffer should be copied.
                ByteBuffer copy = wd.GetWordBytes(null);
                assertNotNull(copy);
                assertEquals(0, copy.CompareTo(ByteBuffer.Wrap(ENCODING.GetBytes(word))));
            }
        }