public void TestIterationMarksWithKeywordTokenizer() { String text = "時々馬鹿々々しいところゞゝゝミスヾ"; JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new Dictionary <String, String>()); TextReader filter = filterFactory.Create(new StringReader(text)); TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false); AssertTokenStreamContents(tokenStream, new String[] { "時時馬鹿馬鹿しいところどころミスズ" }); }
public void TestIterationMarksWithJapaneseTokenizer() { JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary <String, String>()); tokenizerFactory.Inform(new StringMockResourceLoader("")); JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new Dictionary <String, String>()); TextReader filter = filterFactory.Create( new StringReader("時々馬鹿々々しいところゞゝゝミスヾ") ); TokenStream tokenStream = tokenizerFactory.Create(filter); AssertTokenStreamContents(tokenStream, new String[] { "時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ" }); }
public void TestKanaOnlyIterationMarksWithJapaneseTokenizer() { JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new Dictionary <String, String>()); tokenizerFactory.Inform(new StringMockResourceLoader("")); IDictionary <String, String> filterArgs = new Dictionary <String, String>(); filterArgs.Put("normalizeKanji", "false"); filterArgs.Put("normalizeKana", "true"); JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs); TextReader filter = filterFactory.Create( new StringReader("時々馬鹿々々しいところゞゝゝミスヾ") ); TokenStream tokenStream = tokenizerFactory.Create(filter); AssertTokenStreamContents(tokenStream, new String[] { "時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ" }); }