Exemple #1
0
        public void TestIterationMarksWithKeywordTokenizer()
        {
            String text = "時々馬鹿々々しいところゞゝゝミスヾ";
            JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new Dictionary <String, String>());
            TextReader  filter      = filterFactory.Create(new StringReader(text));
            TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);

            AssertTokenStreamContents(tokenStream, new String[] { "時時馬鹿馬鹿しいところどころミスズ" });
        }
Exemple #2
0
        public void TestIterationMarksWithJapaneseTokenizer()
        {
            JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary <String, String>());

            tokenizerFactory.Inform(new StringMockResourceLoader(""));

            JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new Dictionary <String, String>());
            TextReader filter = filterFactory.Create(
                new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
                );
            TokenStream tokenStream = tokenizerFactory.Create(filter);

            AssertTokenStreamContents(tokenStream, new String[] { "時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ" });
        }
Exemple #3
0
        public void TestKanaOnlyIterationMarksWithJapaneseTokenizer()
        {
            JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary <String, String>());

            tokenizerFactory.Inform(new StringMockResourceLoader(""));

            IDictionary <String, String> filterArgs = new Dictionary <String, String>();

            filterArgs.Put("normalizeKanji", "false");
            filterArgs.Put("normalizeKana", "true");
            JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);

            TextReader filter = filterFactory.Create(
                new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
                );
            TokenStream tokenStream = tokenizerFactory.Create(filter);

            AssertTokenStreamContents(tokenStream, new String[] { "時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ" });
        }