示例#1
0
        public static List <string> PanGuSplitWord(string msg)
        {
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(msg));

            ITermAttribute ita;
            bool           hasNext = true;
            List <string>  list    = new List <string>();

            while (hasNext)
            {
                ita = tokenStream.GetAttribute <ITermAttribute>();
                list.Add(ita.Term);
                hasNext = tokenStream.IncrementToken();
            }

            analyzer.Close();
            return(list);

            /*Token token;
            *  List<string> list = new List<string>();
            *  while ((token = tokenStream.) != null)
            *  {
            *   list.Add(token.TermText());
            *  }
            *  return list;*/
        }
示例#2
0
        private static IEnumerable <string> GetSearchTerms(string searchTerm)
        {
            List <string> result   = new List <string>();
            var           analyzer = new PanGuAnalyzer();
            StringReader  sr       = new StringReader(searchTerm);
            TokenStream   stream   = analyzer.TokenStream(null, sr);
            bool          hasnext  = stream.IncrementToken();

            System.DateTime start = System.DateTime.Now;
            ITermAttribute  ita;

            while (hasnext)
            {
                ita = stream.GetAttribute <ITermAttribute>();
                result.Add(ita.Term);
                hasnext = stream.IncrementToken();
            }
            stream.CloneAttributes();
            sr.Close();
            analyzer.Dispose();

            var resultString = string.Join(" ", result);

            return(resultString.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
                   .Concat(new[] { searchTerm })
                   .Distinct(StringComparer.OrdinalIgnoreCase)
                   .Select(Escape));
        }
示例#3
0
        /// <summary>
        /// 对keyword进行分词,将分词的结果返回
        /// </summary>
        public static IEnumerable <string> SplitWords(string keyword)
        {
            IList <string> list     = new List <string>();
            Analyzer       analyzer = new PanGuAnalyzer();
            TokenStream    stream   = analyzer.TokenStream(keyword, new StringReader(keyword));
            ITermAttribute ita      = null;
            bool           hasNext  = stream.IncrementToken();

            while (hasNext)
            {
                ita = stream.GetAttribute <ITermAttribute>();
                list.Add(ita.Term);
                hasNext = stream.IncrementToken();
            }
            return(list);

            //IList<string> list = new List<string>();
            //Analyzer analyzer = new PanGuAnalyzer();
            //TokenStream tokenStream = analyzer.TokenStream("", new StringReader(keyword));
            //Token token = null;
            //while ((token = tokenStream.IncrementToken()) != null)
            //{
            //    // token.TermText()为当前分的词
            //    string word = token.TermText();
            //    list.Add(word);
            //}

            //return list;
        }
示例#4
0
        private List <BookSearchModel> SearchBookContent(string searchWords)
        {
            List <BookSearchModel> bookSearchModelList = new List <BookSearchModel>();
            //1.对搜索条件进行分词
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(searchWords));

            Lucene.Net.Analysis.Token token = null;
            string indexPath = @"D:\lucenedir";
            //string kw = "面向对象";//对用户输入的搜索条件进行拆分。
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            //foreach (string word in kw.Split(' '))//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            //{
            //    query.Add(new Term("body", word));
            //}
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));
            while ((token = tokenStream.Next()) != null)
            {
                query.Add(new Term("body", token.TermText()));
            }
            // query.Add(new Term("body", kw));//body中含有kw的文章
            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            //可以用来实现分页功能
            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int             docId       = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document        doc         = searcher.Doc(docId); //找到文档id对应的文档详细信息
                BookSearchModel searchModel = new BookSearchModel();
                searchModel.Id                = int.Parse(doc.Get("ID"));
                searchModel.Title             = doc.Get("title");
                searchModel.ContenDescription = SearchWordHighlight.CreateHightLight(searchWords, doc.Get("body"));
                //this.listBox1.Items.Add(doc.Get("number") + "\n");// 取出放进字段的值
                //this.listBox1.Items.Add(doc.Get("body") + "\n");
                //this.listBox1.Items.Add("-----------------------\n");
                bookSearchModelList.Add(searchModel);
            }
            //将搜索的此插入词库之中
            SearchDetails entity = new SearchDetails()
            {
                Id = Guid.NewGuid(), KeyWords = searchWords, SearchDateTime = DateTime.Now
            };

            SearchDetailsService.AddEntity(entity);
            return(bookSearchModelList);
        }
示例#5
0
        /// <summary>
        /// 进行搜索
        /// </summary>
        /// <returns></returns>
        public ActionResult Search()
        {
            string kw        = Request["kw"];                 // 获取用户输入的搜索内容
            string indexPath = Server.MapPath("~/lucenedir"); // 从哪里搜索

            // 对用户输入的内容进行分割
            List <string> kws         = new List <string>(); // 定义一个集合用来存储分割后的分词
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(kw.ToString()));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                kws.Add(token.TermText());
            }

            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件

            // 注意:这个类只可以进行单个列条件搜索,如果想要实现多个条件搜索要使用另外一个类
            PhraseQuery query = new PhraseQuery();

            foreach (var word in kws)
            {
                query.Add(new Term("content", word)); // 向content这个列进行搜索
            }

            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.

            // 创建一个list集合用来存储搜索到的结果
            List <BookVieModel> bookList = new List <BookVieModel>();

            for (int i = 0; i < docs.Length; i++)
            {
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int      docId = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId); //找到文档id对应的文档详细信息

                BookVieModel model = new BookVieModel();
                model.Id    = Convert.ToInt32(doc.Get("Id"));             // 注意:这些字段要和在添加搜索词库的时候保持一致
                model.Title = CreateHightLight(kw, doc.Get("title"));     // 注意:这些字段要和在添加搜索词库的时候保持一致
                // 对搜索到结果中的搜索词进行高亮显示
                model.Content = CreateHightLight(kw, doc.Get("content")); // 注意:这些字段要和在添加搜索词库的时候保持一致

                bookList.Add(model);
            }
            ViewBag.books = bookList;
            ViewBag.kw    = kw;
            return(View("Index"));
        }
示例#6
0
        private void button3_Click(object sender, EventArgs e)
        {
            string indexPath = @"C:\Users\杨ShineLon\Desktop\lucenedir"; // 从哪里搜索
            string kw        = textBox1.Text;                           //"面向对象";//对用户输入的搜索条件进行拆分。

            // 对用户输入的内容进行分割
            List <string> kws         = new List <string>(); // 定义一个集合用来存储分割后的分词
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(kw.ToString()));

            Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                kws.Add(token.TermText());
                //Console.WriteLine(token.TermText());
            }

            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            //foreach (string word in kw.Split(' '))//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            //{
            //    query.Add(new Term("body", word));
            //}
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));

            //query.Add(new Term("body", kw));//body中含有kw的文章
            foreach (var word in kws)
            {
                query.Add(new Term("body", word));
            }

            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            //可以用来实现分页功能
            this.listBox1.Items.Clear();
            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int      docId = docs[i].doc;                      //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId);              //找到文档id对应的文档详细信息
                this.listBox1.Items.Add(doc.Get("number") + "\n"); // 取出放进字段的值
                this.listBox1.Items.Add(doc.Get("body") + "\n");
                this.listBox1.Items.Add("-----------------------\n");
            }
        }
示例#7
0
        private void button6_Click(object sender, EventArgs e)
        {
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(textBox2.Text));
            Token       token       = null;

            while ((token = tokenStream.Next()) != null)
            {
                MessageBox.Show(token.TermText());
            }
        }
示例#8
0
        private void button3_Click(object sender, EventArgs e)
        {
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader("面向世界,面向现代化"));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                Console.WriteLine(token.TermText());
            }
        }
示例#9
0
        private void button3_Click(object sender, EventArgs e)
        {
            Analyzer analyzer = new PanGuAnalyzer();

            TokenStream tokenStream = analyzer.TokenStream("", new StringReader("面向对象编程,没有对象哈哈哈"));

            //var cta = tokenStream.AddAttribute<TermAttribute>();
            while (tokenStream.IncrementToken())
            {
                var str = tokenStream.GetAttribute <ITermAttribute>();
                Console.WriteLine(str.Term);
            }
        }
示例#10
0
        /// <summary>
        /// 盘古分词
        /// </summary>
        /// <param name="msg">需要进行拆分的字符串</param>
        /// <returns>拆分结果</returns>
        public static List <string> PanguSplitWords(string msg)
        {
            List <string> list        = new List <string>();
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(msg));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                list.Add(token.TermText());
            }
            return(list);
        }
示例#11
0
        private string[] SplitWords(string content)
        {
            List <string> strList     = new List <string>();
            Analyzer      analyzer    = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(content));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {                                  //Next继续分词 直至返回null
                strList.Add(token.TermText()); //得到分词后结果
            }
            return(strList.ToArray());
        }
示例#12
0
        /// <summary>
        /// 对用户输入的搜索的条件进行分词
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        private static string[] SplitWord(string str)
        {
            List <string> list        = new List <string>();
            Analyzer      analyzer    = new PanGuAnalyzer();                             //指定盘古分词
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(str)); //

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                list.Add(token.TermText());
            }
            return(list.ToArray());
        }
示例#13
0
        public static string[] GetKeyWords(string str)
        {
            var         list        = new List <string>();
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str));
            Token       token;

            while ((token = tokenStream.Next()) != null)
            {
                list.Add(token.TermText());
            }
            return(list.ToArray());
        }
示例#14
0
        /// <summary>
        /// 对字符串进行分词
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public static List <string> GetPanGuWord(string str)
        {
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str));

            Lucene.Net.Analysis.Token token = null;
            List <string>             list  = new List <string>();

            while ((token = tokenStream.Next()) != null)
            {
                list.Add(token.TermText());
            }
            return(list);
        }
示例#15
0
        public static string[] SplitWords(string keyWord)
        {
            List <string>  strList     = new List <string>();
            Analyzer       analyzer    = new PanGuAnalyzer();   //指定使用盘古 PanGuAnalyzer 分词算法
            TokenStream    tokenStream = analyzer.TokenStream("", new StringReader(keyWord));
            ITermAttribute token       = null;

            while (tokenStream.IncrementToken())
            {
                token = tokenStream.GetAttribute <ITermAttribute>();
                strList.Add(token.Term);                 //得到分词后结果
            }
            return(strList.ToArray());
        }
示例#16
0
        private void button2_Click(object sender, EventArgs e)
        {
            Analyzer analyzer = new PanGuAnalyzer();

            using (TokenStream tokenStream = analyzer.TokenStream("", new StringReader("北京,Hi欢迎你们大家")))
            {
                ITermAttribute ita;
                while (tokenStream.IncrementToken())
                {
                    ita = tokenStream.GetAttribute <ITermAttribute>();
                    Console.WriteLine(ita.Term);
                }
            }
        }
示例#17
0
        private string[] SplitWords(string content)
        {
            List <string> strList  = new List <string>();
            Analyzer      analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法

            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content));

            //Lucene.Net.Analysis.Token token = null;
            while (tokenStream.IncrementToken())
            {                                                                  //Next继续分词 直至返回null
                strList.Add(tokenStream.GetAttribute <ITermAttribute>().Term); //得到分词后结果
            }
            return(strList.ToArray());
        }
示例#18
0
        /// <summary>
        /// 利用盘古分词来分词
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public static string[] WordSegmentation(string keyword)
        {
            List <string> list     = new List <string>();
            Analyzer      analyzer = new PanGuAnalyzer();
            //Analyzer analyzer = new StandardAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(keyword));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                list.Add(token.TermText());
            }
            return(list.ToArray());
        }
示例#19
0
        private void button1_Click_1(object sender, EventArgs e)
        {
            string        indexPath = "c:/index";
            string        kw        = textBox1.Text;
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            PhraseQuery   query     = new PhraseQuery();

            //todo:把用户输入的关键词进行拆词
            //char[] str = textBox1.Text.ToCharArray();
            //for (int i = 0; i < str.Length; i++)
            //{
            //    query.Add(new Term("name", str[i].ToString()));
            //}

            List <String> list        = new List <string>();
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(textBox1.Text));
            Token         token       = null;

            while ((token = tokenStream.Next()) != null)
            {
                list.Add(token.TermText());
            }

            for (int i = 0; i < list.Count; i++)
            {
                query.Add(new Term("name", list[i].ToString()));
            }

            query.SetSlop(100);
            TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);

            searcher.Search(query, null, collector);
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;

            for (int i = 0; i < docs.Length; i++)
            {
                int docId = docs[i].doc;//取到文档的编号(主键,这个是Lucene .net分配的)
                //检索结果中只有文档的id,如果要取Document,则需要Doc再去取
                //降低内容占用
                Document doc  = searcher.Doc(docId);//根据id找Document
                string   code = doc.Get("code");
                string   name = doc.Get("name");

                MessageBox.Show("code:" + code + "name:" + name);
            }
        }
示例#20
0
        /// <summary>
        /// 盘古分词
        /// </summary>
        /// <param name="words"></param>
        /// <returns></returns>
        public static object PanGu(string words)
        {
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(words));

            Lucene.Net.Analysis.Token token = null;
            var str = "";

            while ((token = tokenStream.Next()) != null)
            {
                string word = token.TermText(); // token.TermText() 取得当前分词
                str += word + "   |  ";
            }
            return(str);
        }
示例#21
0
        public static List <string> PanGuSplitWord(string msg)
        {
            Analyzer       analyzer = new PanGuAnalyzer();
            StringReader   r        = new StringReader(msg);
            TokenStream    ts       = analyzer.TokenStream("", r);
            ITermAttribute termAtt  = ts.GetAttribute <ITermAttribute>();
            List <string>  list     = new List <string>();

            while (ts.IncrementToken())
            {
                string iterm = termAtt.Term;
                list.Add(iterm);
            }
            return(list);
        }
示例#22
0
        private void button2_Click(object sender, EventArgs e)
        {
            Analyzer     analyzer = new PanGuAnalyzer();
            StringReader r        = new StringReader("南通大学,欢迎您");
            TokenStream  ts       = analyzer.TokenStream("", r);

            Console.WriteLine("=====PanGu analyzer=======");
            ITermAttribute termAtt = ts.GetAttribute <ITermAttribute>();

            while (ts.IncrementToken())
            {
                string iterm = termAtt.Term;
                Console.WriteLine("[" + iterm + "]");
            }
        }
示例#23
0
        public static string[] PanGuSplit(string key)
        {
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(key));

            Lucene.Net.Analysis.Token token = null;
            List <string>             list  = new List <string>();

            while ((token = tokenStream.Next()) != null)
            {
                //Console.WriteLine(token.TermText());
                list.Add(token.TermText());
            }
            return(list.ToArray());
        }
示例#24
0
        /// <summary>
        /// 盘古分词
        /// </summary>
        /// <param name="msg"></param>
        /// <returns></returns>
        public static List <string> PanGuSplitWord(string msg)
        {
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(msg));
            List <string> list        = new List <string>();
            bool          hasnext     = false;

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasnext = tokenStream.IncrementToken())
            {
                ita = tokenStream.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                list.Append(ita.Term);
            }
            return(list);
        }
示例#25
0
        /// <summary>
        /// 对keyword进行分词,将分词的结果返回
        /// </summary>
        public static IEnumerable <string> SplitWords(string keyword)
        {
            IList <string> list        = new List <string>();
            Analyzer       analyzer    = new PanGuAnalyzer();
            TokenStream    tokenStream = analyzer.TokenStream("", new StringReader(keyword));
            Token          token       = null;

            while ((token = tokenStream.Next()) != null)
            {
                // token.TermText()为当前分的词
                string word = token.TermText();
                list.Add(word);
            }

            return(list);
        }
示例#26
0
        /// <summary>
        /// 对索引分词
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public static string[] SqlitIndexWord(string str)
        {
            //盘古分词 //对输入的搜索条件进行分词
            List <string> list        = new List <string>();
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(str));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                Console.WriteLine(token.TermText());
                list.Add(token.TermText());
            }

            return(list.ToArray());
        }
示例#27
0
        /// <summary>
        /// 对输入的搜索条件进行分词
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public static List <string> GetPanGuWord(string str)
        {
            Analyzer analyzer = new PanGuAnalyzer();

            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str));

            var list = new List <string>();

            while (tokenStream.IncrementToken())
            {
                var term = tokenStream.GetAttribute <ITermAttribute>();
                list.Add(term.Term);
            }

            return(list);
        }
示例#28
0
        public static List <string> PanGuSplitWord(string kw)
        {
            List <string> list     = new List <string>();
            Analyzer      analyzer = new PanGuAnalyzer();

            using (TokenStream tokenStream = analyzer.TokenStream("", new StringReader(kw)))
            {
                ITermAttribute ita;
                while (tokenStream.IncrementToken())
                {
                    ita = tokenStream.GetAttribute <ITermAttribute>();
                    list.Add(ita.Term);
                }
            }
            return(list);
        }
示例#29
0
        public static string GetKeyWordSplid(string keywords)
        {
            StringBuilder  sb       = new StringBuilder();
            Analyzer       analyzer = new PanGuAnalyzer();
            TokenStream    stream   = analyzer.TokenStream(keywords, new StringReader(keywords));
            ITermAttribute ita      = null;
            bool           hasNext  = stream.IncrementToken();

            while (hasNext)
            {
                ita = stream.GetAttribute <ITermAttribute>();
                sb.Append(ita.Term + " ");
                hasNext = stream.IncrementToken();
            }
            return(sb.ToString());
        }
示例#30
0
        /// <summary>
        /// 把输入的msg进行分词
        /// </summary>
        /// <param name="msg"></param>
        /// <returns></returns>
        public static IEnumerable <string> SplitWords(string msg)
        {
            List <string> list = new List <string>();

            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("",
                                                           new StringReader(msg));

            Lucene.Net.Analysis.Token token = null;
            //Next()取分到的下一个词
            while ((token = tokenStream.Next()) != null)
            {
                string word = token.TermText();//分到的词
                list.Add(word);
            }
            return(list);
        }