Example #1
0
        public void TestJIEbaTokenizer()
        {
            var tokenizer = new JiebaTokenizer(TextReader.Null, TokenizerMode.Default);

            Assert.NotEmpty(tokenizer.StopWords);

            Assert.True(tokenizer.StopWords.ContainsKey("是"));
            Assert.True(tokenizer.StopWords.ContainsKey("什么"));
        }
Example #2
0
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     var seg = new JiebaSegmenter();
     TokenStream result = new JiebaTokenizer(seg, reader);
     // This filter is necessary, because the parser converts the queries to lower case.
     result = new LowerCaseFilter(result);
     result = new StopFilter(true, result, StopWords);
     return result;
 }
Example #3
0
        private static string GetKeyWordsSplitBySpace(string keywords, JiebaTokenizer tokenizer)
        {
            var result = new StringBuilder();

            var words = tokenizer.Tokenize(keywords);

            foreach (var word in words)
            {
                if (string.IsNullOrWhiteSpace(word.Word))
                {
                    continue;
                }

                result.AppendFormat("{0} ", word.Word);
            }

            return(result.ToString().Trim());
        }
        private static string GetKeyWordsSplitBySpace(string keywords, JiebaTokenizer tokenizer)
        {
            var result = new StringBuilder();

            var words = tokenizer.Tokenize(keywords);

            foreach (var word in words)
            {
                if (string.IsNullOrWhiteSpace(word.Word))
                {
                    continue;
                }

                result.AppendFormat("{0} ", word.Word);
            }

            return result.ToString().Trim();
        }