public void TestInputWithSeparators()
        {
            //URL url = this.GetType().getResource("test-separators.dict");
            string           dict = "test-separators.dict";
            DictionaryLookup s    = new DictionaryLookup(ReadDictionary(dict));

            /*
             * Attemp to reconstruct input sequences using WordData iterator.
             */
            List <String> sequences = new List <String>();

            foreach (WordData wd in s)
            {
                var stemSequence = wd.GetStem();
                var tagSequence  = wd.GetTag();
                var stem         = stemSequence == null ? "null" : stemSequence.ToString();
                var tag          = tagSequence == null ? "null" : tagSequence.ToString();

                sequences.Add($"{wd.Word} {stem} {tag}");
            }
            sequences.Sort(StringComparer.Ordinal);

            assertEquals("token1 null null", sequences[0]);
            assertEquals("token2 null null", sequences[1]);
            assertEquals("token3 null +", sequences[2]);
            assertEquals("token4 token2 null", sequences[3]);
            assertEquals("token5 token2 null", sequences[4]);
            assertEquals("token6 token2 +", sequences[5]);
            assertEquals("token7 token2 token3+", sequences[6]);
            assertEquals("token8 token2 token3++", sequences[7]);
        }
        public void TestInfixDictionaries()
        {
            //URL url = this.GetType().getResource("test-infix.dict");

            string   dict = "test-infix.dict";
            IStemmer s    = new DictionaryLookup(ReadDictionary(dict));

            //Assertions.assertThat(stem(s, "Rzeczypospolitej"))
            //      .containsExactly("Rzeczpospolita", "subst:irreg");

            //Assertions.assertThat(stem(s, "Rzeczyccy"))
            //      .containsExactly("Rzeczycki", "adj:pl:nom:m");

            //Assertions.assertThat(stem(s, "Rzecząpospolitą"))
            //      .containsExactly("Rzeczpospolita", "subst:irreg");

            assertEquals(new string[] { "Rzeczpospolita", "subst:irreg" }, stem(s, "Rzeczypospolitej"));
            assertEquals(new string[] { "Rzeczycki", "adj:pl:nom:m" }, stem(s, "Rzeczyccy"));
            assertEquals(new string[] { "Rzeczpospolita", "subst:irreg" }, stem(s, "Rzecząpospolitą"));

            // This word is not in the dictionary.
            assertNoStemFor(s, "martygalski");

            // This word uses characters that are outside of the encoding range of the dictionary.
            assertNoStemFor(s, "Rzeczyckiõh");
        }
        public void TestWordDataIterator()
        {
            //URL url = this.GetType().getResource("test-infix.dict");

            string           dict = "test-infix.dict";
            DictionaryLookup s    = new DictionaryLookup(ReadDictionary(dict));

            HashSet <String> entries = new HashSet <String>();

            foreach (WordData wd in s)
            {
                entries.Add(wd.Word + " " + wd.GetStem() + " " + wd.GetTag());
            }

            // Make sure a sample of the entries is present.
            //Assertions.assertThat(entries)
            //  .contains(
            //      "Rzekunia Rzekuń subst:sg:gen:m",
            //      "Rzeczkowskie Rzeczkowski adj:sg:nom.acc.voc:n+adj:pl:acc.nom.voc:f.n",
            //      "Rzecząpospolitą Rzeczpospolita subst:irreg",
            //      "Rzeczypospolita Rzeczpospolita subst:irreg",
            //      "Rzeczypospolitych Rzeczpospolita subst:irreg",
            //      "Rzeczyckiej Rzeczycki adj:sg:gen.dat.loc:f");

            assertTrue(entries.IsSupersetOf(new string[]
            {
                "Rzekunia Rzekuń subst:sg:gen:m",
                "Rzeczkowskie Rzeczkowski adj:sg:nom.acc.voc:n+adj:pl:acc.nom.voc:f.n",
                "Rzecząpospolitą Rzeczpospolita subst:irreg",
                "Rzeczypospolita Rzeczpospolita subst:irreg",
                "Rzeczypospolitych Rzeczpospolita subst:irreg",
                "Rzeczyckiej Rzeczycki adj:sg:gen.dat.loc:f"
            }));
        }
        public void TestGetSeparator()
        {
            //URL url = this.GetType().getResource("test-separators.dict");
            string           dict = "test-separators.dict";
            DictionaryLookup s    = new DictionaryLookup(ReadDictionary(dict));

            assertEquals('+', s.SeparatorChar);
        }
        public void TestMultibyteEncodingUTF8()
        {
            //URL url = this.GetType().getResource("test-diacritics-utf8.dict");
            string     dict = "test-diacritics-utf8.dict";
            Dictionary read = ReadDictionary(dict);
            IStemmer   s    = new DictionaryLookup(read);

            assertArrayEquals(new String[] { "merge", "001" }, stem(s, "mergeam"));
            assertArrayEquals(new String[] { "merge", "002" }, stem(s, "merseserăm"));
        }
        public void TestInputConversion()
        {
            //URL url = this.GetType().getResource("test-prefix.dict");

            string   dict = "test-prefix.dict";
            IStemmer s    = new DictionaryLookup(ReadDictionary(dict));

            assertArrayEquals(new String[] { "Rzeczpospolita", "subst:irreg" },
                              stem(s, "Rzecz\\apospolit\\a"));

            assertArrayEquals(new String[] { "Rzeczpospolita", "subst:irreg" },
                              stem(s, "krowa\\apospolit\\a"));
        }
        public void TestPrefixDictionaries()
        {
            //URL url = this.GetType().getResource("test-prefix.dict");
            string   dict = "test-prefix.dict";
            IStemmer s    = new DictionaryLookup(ReadDictionary(dict));

            assertArrayEquals(new String[] { "Rzeczpospolita", "subst:irreg" },
                              stem(s, "Rzeczypospolitej"));
            assertArrayEquals(new String[] { "Rzeczpospolita", "subst:irreg" },
                              stem(s, "Rzecząpospolitą"));

            // This word is not in the dictionary.
            assertNoStemFor(s, "martygalski");
        }
        public void TestSeparatorInLookupTerm()
        {
            FSA fsa = FSA.Read(GetType().getResourceAsStream("test-separator-in-lookup.fsa"));

            DictionaryMetadata metadata = new DictionaryMetadataBuilder()
                                          .Separator('+')
                                          .Encoding("iso8859-1")
                                          .Encoder(EncoderType.Infix)
                                          .Build();

            DictionaryLookup s = new DictionaryLookup(new Dictionary(fsa, metadata));

            assertEquals(0, s.Lookup("l+A").Count);
        }
        public void TestSynthesis()
        {
            //URL url = this.GetType().getResource("test-synth.dict");
            string   dict = "test-synth.dict";
            IStemmer s    = new DictionaryLookup(ReadDictionary(dict));

            assertArrayEquals(new String[] { "miała", null }, stem(s,
                                                                   "mieć|verb:praet:sg:ter:f:?perf"));
            assertArrayEquals(new String[] { "a", null }, stem(s, "a|conj"));
            assertArrayEquals(new String[] { }, stem(s, "dziecko|subst:sg:dat:n"));

            // This word is not in the dictionary.
            assertNoStemFor(s, "martygalski");
        }
        public void TestApplyReplacements()
        {
            // .NET: As long as we don't delete anything, Dictionary will retain insertion order.
            IDictionary <string, string> conversion = new Dictionary <string, string>
            {
                ["'"]      = "`",
                ["fi"]     = "fi",
                ["\\a"]    = "ą",
                ["Barack"] = "George",
                ["_"]      = "xx"
            };

            assertEquals("filut", DictionaryLookup.ApplyReplacements("filut", conversion));
            assertEquals("fizdrygałką", DictionaryLookup.ApplyReplacements("fizdrygałk\\a", conversion));
            assertEquals("George Bush", DictionaryLookup.ApplyReplacements("Barack Bush", conversion));
            assertEquals("xxxxxxxx", DictionaryLookup.ApplyReplacements("____", conversion));
        }
        public void TestWordDataCloning()
        {
            //URL url = this.GetType().getResource("test-infix.dict");

            string dict = "test-infix.dict";


            DictionaryLookup s = new DictionaryLookup(ReadDictionary(dict));

            List <WordData> words = new List <WordData>();

            foreach (WordData wd in s)
            {
                WordData clone = (WordData)wd.Clone();
                words.Add(clone);
            }

            // Reiterate and verify that we have the same entries.
            DictionaryLookup s2 = new DictionaryLookup(ReadDictionary(dict));
            int i = 0;

            foreach (WordData wd in s2)
            {
                WordData clone = words[i++];
                assertEquals(clone.GetStem(), wd.GetStem());
                assertEquals(clone.GetTag(), wd.GetTag());
                assertEquals(clone.Word, wd.Word);
            }

            // Check collections contract.
            HashSet <WordData> entries = new HashSet <WordData>();

            try
            {
                entries.Add(words[0]);
                fail();
            }
            catch (Exception e)
            {
                // Expected.
            }
        }