protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new JapaneseTokenizer(reader, userDict, true, mode); TokenStream stream = new JapaneseBaseFormFilter(tokenizer); stream = new JapanesePartOfSpeechStopFilter(m_matchVersion, stream, stoptags); stream = new CJKWidthFilter(stream); stream = new StopFilter(m_matchVersion, stream, m_stopwords); stream = new JapaneseKatakanaStemFilter(stream); stream = new LowerCaseFilter(m_matchVersion, stream); return(new TokenStreamComponents(tokenizer, stream)); }
public void TestRomajiReadingsHalfWidth() { Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH); TokenStream stream = new CJKWidthFilter(tokenizer); return(new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, true))); }); AssertAnalyzesTo(a, "今夜はロバート先生と話した", new String[] { "kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta" } ); }
public void TestKatakanaReadingsHalfWidth() { Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH); TokenStream stream = new CJKWidthFilter(tokenizer); return(new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, false))); }); AssertAnalyzesTo(a, "今夜はロバート先生と話した", new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" } ); }