Beispiel #1
0
        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer   tokenizer = new JapaneseTokenizer(reader, userDict, true, mode);
            TokenStream stream    = new JapaneseBaseFormFilter(tokenizer);

            stream = new JapanesePartOfSpeechStopFilter(m_matchVersion, stream, stoptags);
            stream = new CJKWidthFilter(stream);
            stream = new StopFilter(m_matchVersion, stream, m_stopwords);
            stream = new JapaneseKatakanaStemFilter(stream);
            stream = new LowerCaseFilter(m_matchVersion, stream);
            return(new TokenStreamComponents(tokenizer, stream));
        }
Beispiel #2
0
        public void TestRomajiReadingsHalfWidth()
        {
            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH);
                TokenStream stream  = new CJKWidthFilter(tokenizer);
                return(new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, true)));
            });

            AssertAnalyzesTo(a, "今夜はロバート先生と話した",
                             new String[] { "kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta" }
                             );
        }
Beispiel #3
0
        public void TestKatakanaReadingsHalfWidth()
        {
            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH);
                TokenStream stream  = new CJKWidthFilter(tokenizer);
                return(new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, false)));
            });

            AssertAnalyzesTo(a, "今夜はロバート先生と話した",
                             new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" }
                             );
        }