Exemple #1
0
        public static string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
        {
            StringBuilder          result = new StringBuilder();
            ICollection <WordInfo> words  = ktTokenizer.SegmentToWordInfos(keywords);

            foreach (WordInfo word in words)
            {
                if (word == null)
                {
                    continue;
                }
                result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
            }
            return(result.ToString().Trim());
        }
Exemple #2
0
        /// <summary>
        /// 盘古分词
        /// </summary>
        /// <param name="keywords">分词关键字</param>
        /// <returns>分词后的字符</returns>
        /// <remarks>2013-3-8 杨浩 创建</remarks>
        public static string Participle(string keywords)
        {
            StringBuilder  result              = new StringBuilder();
            PanGuTokenizer ktTokenizer         = new PanGuTokenizer();
            ICollection <PanGu.WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);

            if (words.Count != 0)
            {
                _keyword = words.Max().Word;//记录用户查询关键字
            }
            foreach (var word in words.Where(word => word != null))
            {
                result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
            }
            return(result.ToString().Trim());
        }
        private static string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
        {
            StringBuilder result = new StringBuilder();
            /*执行分词操作 一个关键字可以拆分为多个次和单个字*/
            ICollection <WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);

            foreach (WordInfo word in words)
            {
                if (word == null)
                {
                    continue;
                }

                result.AppendFormat("{0}/", word.Word);
            }

            return(result.ToString().Trim());
        }
        /// <summary>
        /// 字符串分词
        /// </summary>
        /// <param name="keywords">字符串</param>
        /// <param name="ktTokenizer">分词</param>
        /// <returns></returns>
        public string GetKeyWordsSplitFilter(string keywords, PanGuTokenizer ktTokenizer)
        {
            //  拆分字符串
            List <string>          list  = new List <string>();
            ICollection <WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);

            foreach (WordInfo word in words)
            {
                if (word == null)
                {
                    continue;
                }
                else if (word.Word.Length > 1 && !list.Contains(word.Word))
                {
                    list.Add(word.Word);
                }
            }
            return(string.Join(" ", list.ToArray()));
        }
Exemple #5
0
        public void ContainAnalyzerTest()
        {
            LuceneEngine engine = new LuceneEngine();

            engine.Init();
            //string text = "三国演义";
            //string text = "a|f|g|i|m";
            string         text = "NBA常规赛-快船vs凯尔特人";
            TextReader     tr   = new StringReader(text);
            PanGuTokenizer ct   = new PanGuTokenizer(tr);
            int            end  = 0;

            Lucene.Net.Analysis.Token t;
            string ss = string.Empty;

            while (end < text.Length)
            {
                t   = ct.Next();
                end = t.EndOffset();
                ss  = ss + t.TermText() + "/ ";
            }
            string xxx = ss;
        }
Exemple #6
0
        public void SpellTest()
        {
            LuceneEngine engine = new LuceneEngine();

            engine.Init();
            var firsts     = SpellUtils.GetCnSegment("NBA常规赛-快船vs凯尔特人");
            var spells     = SpellUtils.GetSpellSegment("战重警和");
            var segment    = new Segment();
            var collection = segment.DoSegment("国际足球100509K联赛釜山-大田");
            var list       = new List <string>();

            foreach (WordInfo word in collection)
            {
                if (word == null)
                {
                    continue;
                }
                list.Add(word.Word);
            }
            //var list = SpellUtils.GetSpellSegment("战警");
            string         sss  = "san国yan义";
            string         ssss = Synacast.LuceneNetSearcher.Searcher.Searcher.SegmentKeyWord(sss);
            string         text = "重庆";
            TextReader     tr   = new StringReader(text);
            PanGuTokenizer ct   = new PanGuTokenizer(tr);
            int            end  = 0;

            Lucene.Net.Analysis.Token t;
            string ss = string.Empty;

            while (end < text.Length)
            {
                t   = ct.Next();
                end = t.EndOffset();
                ss  = ss + t.TermText() + "/ ";
            }
        }