Example #1
0
        private TopDocs PhraseQuery(string keyword, string field, int slop)
        {
            string[] words = keyword.Trim().Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            TopDocs  docs  = null;
            int      n     = 10;//最多返回多少个结果

            SetOutput(string.Format("正在检索关键字:{0}", keyword));
            try
            {
                PhraseQuery query = new PhraseQuery();
                query.SetSlop(slop);
                foreach (string word in words)
                {
                    Term t = new Term(field, word);
                    query.Add(t);
                }
                Stopwatch watch = new Stopwatch();
                watch.Start();
                docs = searcher.Search(query, (Filter)null, n);
                watch.Stop();
                StringBuffer sb = "PhraseQuery搜索完成,共用时:" + watch.Elapsed.Hours + "时 " + watch.Elapsed.Minutes + "分 " + watch.Elapsed.Seconds + "秒 " + watch.Elapsed.Milliseconds + "毫秒";
                SetOutput(sb);
            }
            catch (Exception ex)
            {
                SetOutput(ex.Message);
                docs = null;
            }
            return(docs);
        }
Example #2
0
        /// <summary>
        /// Adds a standard type clause to this instance
        /// </summary>
        /// <param name="term">Term to add to this query.</param>
        /// <param name="occurrence">Defines how the term is added to this query.</param>
        /// <param name="slop">The amount of allowed slop in a phrase query.</param>
        /// <remarks>
        /// Slop is the amount of movement each word is allowed in a non-exact phrase query.
        /// For instance if you search for "Adobe Systems Incorporated" and the slop is set to 0 then
        /// only results with that term is allowed. If you set the slop to 2 then two movements can be
        /// made, max, for each word. In the same example with slop set to 2 results would be returned
        /// for "Adobe Systems Incorporated", "Adobe Incorporated Systems", "Systems Adobe Incorporated",
        /// and "Systems Incorporated Adobe".
        /// </remarks>
        public void AddBooleanClause(SearchTerm term, ClauseOccurrence occurrence, int slop)
        {
            if (term == null)
            {
                throw new ArgumentNullException("term", "term cannot be null");
            }
            IncrementTotalClauses(1);

            if (term.IsPhrase)
            {
                PhraseQuery phraseQuery = new PhraseQuery();
                phraseQuery.Add(term.GetLuceneTerm());
                phraseQuery.SetSlop(slop);
                phraseQuery.SetBoost(term.Boost);
                this.luceneQuery.Add(phraseQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                phraseQuery = null;
            }
            else
            {
                TermQuery termQuery = new TermQuery(term.GetLuceneTerm());
                termQuery.SetBoost(term.Boost);
                this.luceneQuery.Add(termQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                termQuery = null;
            }
        }
Example #3
0
 /*
  * Check if src and dest have overlapped part and if it is, create PhraseQueries and add expandQueries.
  * 
  * ex1) src="a b", dest="c d"       => no overlap
  * ex2) src="a b", dest="a b c"     => no overlap
  * ex3) src="a b", dest="b c"       => overlap; expandQueries={"a b c"}
  * ex4) src="a b c", dest="b c d"   => overlap; expandQueries={"a b c d"}
  * ex5) src="a b c", dest="b c"     => no overlap
  * ex6) src="a b c", dest="b"       => no overlap
  * ex7) src="a a a a", dest="a a a" => overlap;
  *                                     expandQueries={"a a a a a","a a a a a a"}
  * ex8) src="a b c d", dest="b c"   => no overlap
  */
 private void CheckOverlap(Dictionary<Query,Query> expandQueries, Term[] src, Term[] dest, int slop, float boost)
 {
     // beginning from 1 (not 0) is safe because that the PhraseQuery has multiple terms
     // is guaranteed in flatten() method (if PhraseQuery has only one term, flatten()
     // converts PhraseQuery to TermQuery)
     for (int i = 1; i < src.Length; i++)
     {
         bool overlap = true;
         for (int j = i; j < src.Length; j++)
         {
             if ((j - i) < dest.Length && !src[j].Text().Equals(dest[j - i].Text()))
             {
                 overlap = false;
                 break;
             }
         }
         if (overlap && src.Length - i < dest.Length)
         {
             PhraseQuery pq = new PhraseQuery();
             foreach (Term srcTerm in src)
                 pq.Add(srcTerm);
             for (int k = src.Length - i; k < dest.Length; k++)
             {
                 pq.Add(new Term(src[0].Field(), dest[k].Text()));
             }
             pq.SetSlop(slop);
             pq.SetBoost(boost);
             if (!expandQueries.ContainsKey(pq))
                 expandQueries.Add(pq,pq);
         }
     }
 }
Example #4
0
        private List <BookSearchModel> SearchBookContent(string searchWords)
        {
            List <BookSearchModel> bookSearchModelList = new List <BookSearchModel>();
            //1.对搜索条件进行分词
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(searchWords));

            Lucene.Net.Analysis.Token token = null;
            string indexPath = @"D:\lucenedir";
            //string kw = "面向对象";//对用户输入的搜索条件进行拆分。
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            //foreach (string word in kw.Split(' '))//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            //{
            //    query.Add(new Term("body", word));
            //}
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));
            while ((token = tokenStream.Next()) != null)
            {
                query.Add(new Term("body", token.TermText()));
            }
            // query.Add(new Term("body", kw));//body中含有kw的文章
            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            //可以用来实现分页功能
            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int             docId       = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document        doc         = searcher.Doc(docId); //找到文档id对应的文档详细信息
                BookSearchModel searchModel = new BookSearchModel();
                searchModel.Id                = int.Parse(doc.Get("ID"));
                searchModel.Title             = doc.Get("title");
                searchModel.ContenDescription = SearchWordHighlight.CreateHightLight(searchWords, doc.Get("body"));
                //this.listBox1.Items.Add(doc.Get("number") + "\n");// 取出放进字段的值
                //this.listBox1.Items.Add(doc.Get("body") + "\n");
                //this.listBox1.Items.Add("-----------------------\n");
                bookSearchModelList.Add(searchModel);
            }
            //将搜索的此插入词库之中
            SearchDetails entity = new SearchDetails()
            {
                Id = Guid.NewGuid(), KeyWords = searchWords, SearchDateTime = DateTime.Now
            };

            SearchDetailsService.AddEntity(entity);
            return(bookSearchModelList);
        }
Example #5
0
        /// <summary>
        /// 根据已经有的索引检索数据
        /// </summary>
        /// <param name="FieldTitle">检索的字段,注意,这个一定要求是索引中已经存在的</param>
        /// <param name="keyword">关键字、词、句</param>
        /// <param name="directoryinfo">索引所在的位置</param>
        /// <param name="costTime">检索花费的时间</param>
        /// <param name="CountNum">检索到的条数</param>
        /// <param name="CountNum">检索是否成功</param>
        /// <returns></returns>
        public static IEnumerable <Document> Query(string FieldTitle, string keyword, DirectoryInfo directoryinfo, out TimeSpan costTime, out int CountNum, out bool isSuccess)
        {
            isSuccess = false;
            costTime  = TimeSpan.Zero;
            CountNum  = 0;
            string indexPath = string.Empty;

            if (directoryinfo.Exists)
            {
                indexPath = directoryinfo.FullName;
            }
            else
            {
                ShowMessageBox(new Page(), "索引路径不正确");
                return(null);
            }
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader    = IndexReader.Open(directory, true);

            if (reader.GetFieldNames(IndexReader.FieldOption.ALL).Contains(FieldTitle))//判断索引中是否包含此字段
            {
                IndexSearcher searcher = new IndexSearcher(reader);
                //BooleanQuery query = new BooleanQuery();
                PhraseQuery query    = new PhraseQuery();
                string[]    KeyWords = WordSegmentation(keyword);
                foreach (string word in KeyWords)//先用空格,让用户去分词,空格分隔的就是词“计算机 专业”
                {
                    //query.Add(new TermQuery(new Term(FieldTitle, word)), BooleanClause.Occur.SHOULD);//每个词只要有就查出来,用的“与或搜索”
                    query.Add(new Term(FieldTitle, word));
                }
                query.SetSlop(100);
                TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
                Stopwatch            stopwatch = Stopwatch.StartNew();
                stopwatch.Start();
                searcher.Search(query, null, collector);
                stopwatch.Stop();
                costTime = stopwatch.Elapsed;
                CountNum = collector.GetTotalHits();
                ScoreDoc[]      docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;
                List <Document> list = new List <Document>();
                for (int i = 0; i < docs.Length; i++)
                {
                    int      docId = docs[i].doc;         //取到文档的编号(主键,这个是Lucene .net分配的)//检索结果中只有文档的id,如果要取Document,则需要Doc再去取 //降低内容占用
                    Document doc   = searcher.Doc(docId); //根据id找Document
                    list.Add(doc);
                    //string url = highLight(KeyWords[0], doc.Get("url"));
                    //string body = highLight(KeyWords[0], doc.Get("body"));
                    //Response.Write(@"<a href='" + url + "'>" + url + "</a> <br/>" + body + "<br /><br />");
                    //Response.Write("<hr/><br />");
                }
                isSuccess = true;
                return(list.ToArray());
            }
            else
            {
                return(null);
            }
        }
Example #6
0
        /// <summary>
        /// 根据多个关键字添加PhraseQuery
        /// </summary>
        /// <param name="fieldName">待搜索的字段名称</param>
        /// <param name="phrases">待搜索的短语列表</param>
        /// <param name="boostLevel">权重级别</param>
        /// <param name="asFilter">是否作为过滤条件</param>
        /// <returns>LuceneSearchBuilder</returns>
        public LuceneSearchBuilder WithPhrases(string fieldName, IEnumerable <string> phrases, BoostLevel?boostLevel = null, bool asFilter = false)
        {
            BooleanQuery query = new BooleanQuery();

            foreach (string phrase in phrases)
            {
                string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase);
                if (string.IsNullOrEmpty(filteredPhrase))
                {
                    continue;
                }

                if (filteredPhrase.Length == 1)
                {
                    Term  term = new Term(fieldName, filteredPhrase);
                    Query q    = new PrefixQuery(term);

                    if (boostLevel.HasValue)
                    {
                        SetBoost(q, boostLevel.Value);
                    }

                    query.Add(q, BooleanClause.Occur.SHOULD);

                    continue;
                }

                string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase);

                PhraseQuery phraseQuery = new PhraseQuery();
                foreach (var nameSegment in nameSegments)
                {
                    phraseQuery.Add(new Term(fieldName, nameSegment));
                }

                phraseQuery.SetSlop(PhraseQuerySlop);

                if (boostLevel.HasValue)
                {
                    SetBoost(phraseQuery, boostLevel.Value);
                }

                query.Add(phraseQuery, BooleanClause.Occur.SHOULD);
            }


            if (asFilter)
            {
                filters.Add(new BooleanClause(query, BooleanClause.Occur.MUST));
            }
            else
            {
                clauses.Add(new BooleanClause(query, BooleanClause.Occur.MUST));
            }

            return(this);
        }
        private List <ViewModelContent> SearchWants()
        {
            string        indexPath = @"G:\lucenedir";
            List <string> list      = Common.WebCommon.PanGuSplitWord(Request["txtSearch"]);//对用户输入的搜索条件进行拆分。
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery queryTitle = new PhraseQuery();

            foreach (string word in list)//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            {
                queryTitle.Add(new Term("Title", word));
                T_SearchLogsService.AddEntity(new T_SearchLogs {
                    Id         = Guid.NewGuid(),
                    Word       = word,
                    SearchDate = DateTime.Now
                });//将用户搜索的关键词插入到明细表中
            }
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));
            // query.Add(new Term("body", kw));//body中含有kw的文章
            queryTitle.SetSlop(100); //多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
                                     //TopScoreDocCollector是盛放查询结果的容器
            PhraseQuery queryContent = new PhraseQuery();

            foreach (string word in list)//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            {
                queryContent.Add(new Term("Content", word));
            }
            queryContent.SetSlop(100);
            BooleanQuery query = new BooleanQuery();//总查询条件

            query.Add(queryTitle, BooleanClause.Occur.SHOULD);
            query.Add(queryContent, BooleanClause.Occur.SHOULD);
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            //可以用来实现分页功能
            List <ViewModelContent> viewModelList = new List <ViewModelContent>();

            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                ViewModelContent viewModel = new ViewModelContent();
                int      docId             = docs[i].doc;           //得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);                 //找到文档id对应的文档详细信息
                viewModel.Id      = Convert.ToInt32(doc.Get("Id")); // 取出放进字段的值
                viewModel.Content = Common.WebCommon.CreateHightLight(Request["txtSearch"], doc.Get("Content"));
                viewModel.Title   = Common.WebCommon.CreateHightLight(Request["txtSearch"], doc.Get("Title"));

                viewModelList.Add(viewModel);
            }
            return(viewModelList);
        }
Example #8
0
        /// <summary>
        /// 进行搜索
        /// </summary>
        /// <returns></returns>
        public ActionResult Search()
        {
            string kw        = Request["kw"];                 // 获取用户输入的搜索内容
            string indexPath = Server.MapPath("~/lucenedir"); // 从哪里搜索

            // 对用户输入的内容进行分割
            List <string> kws         = new List <string>(); // 定义一个集合用来存储分割后的分词
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(kw.ToString()));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                kws.Add(token.TermText());
            }

            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件

            // 注意:这个类只可以进行单个列条件搜索,如果想要实现多个条件搜索要使用另外一个类
            PhraseQuery query = new PhraseQuery();

            foreach (var word in kws)
            {
                query.Add(new Term("content", word)); // 向content这个列进行搜索
            }

            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.

            // 创建一个list集合用来存储搜索到的结果
            List <BookVieModel> bookList = new List <BookVieModel>();

            for (int i = 0; i < docs.Length; i++)
            {
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int      docId = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId); //找到文档id对应的文档详细信息

                BookVieModel model = new BookVieModel();
                model.Id    = Convert.ToInt32(doc.Get("Id"));             // 注意:这些字段要和在添加搜索词库的时候保持一致
                model.Title = CreateHightLight(kw, doc.Get("title"));     // 注意:这些字段要和在添加搜索词库的时候保持一致
                // 对搜索到结果中的搜索词进行高亮显示
                model.Content = CreateHightLight(kw, doc.Get("content")); // 注意:这些字段要和在添加搜索词库的时候保持一致

                bookList.Add(model);
            }
            ViewBag.books = bookList;
            ViewBag.kw    = kw;
            return(View("Index"));
        }
Example #9
0
        public List <T> SearchFromIndexData <T>(string searchKey, int pageIndex, int pageSize, out int totalCount, string orderByFiled, bool isDesc, string needSearchField)
        {
            List <T>      list      = new List <T>();
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(IndexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            //把用户输入的关键字进行分词
            foreach (string word in SplitWords(searchKey))
            {
                query.Add(new Term(needSearchField, word));
            }
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100); //指定关键词相隔最大距离

            //TopScoreDocCollector盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            Sort sort      = new Sort(new SortField(orderByFiled, SortField.DOC, isDesc));
            var  docResult = searcher.Search(query, sort);//根据query查询条件进行查询,查询结果放入collector容器

            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果

            //  ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;
            totalCount = docResult.Length();
            int startIndex = Math.Max((pageIndex - 1) * pageSize, 0);
            int endIndex   = 0;

            if (totalCount < pageSize)
            {
                endIndex = startIndex + totalCount;
            }
            else
            {
                endIndex = startIndex + pageSize;
            }

            //展示数据实体对象集合
            for (int i = startIndex; i < endIndex; i++)
            {
                int      docId = docResult.Id(i);                //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId);            //根据文档id来获得文档对象Document

                T    t         = Activator.CreateInstance <T>(); //创建T对象
                Type type      = typeof(T);
                var  fieldList = type.GetProperties();
                for (int j = 0; j < fieldList.Length; j++)
                {
                    var f = type.GetProperty(fieldList[j].Name);
                    f.SetValue(t, Utilities.ConvertToT(f.PropertyType.Name, doc.Get(fieldList[j].Name)));
                }
                list.Add(t);
            }
            return(list);
        }
Example #10
0
        private void button3_Click(object sender, EventArgs e)
        {
            string indexPath = @"C:\Users\杨ShineLon\Desktop\lucenedir"; // 从哪里搜索
            string kw        = textBox1.Text;                           //"面向对象";//对用户输入的搜索条件进行拆分。

            // 对用户输入的内容进行分割
            List <string> kws         = new List <string>(); // 定义一个集合用来存储分割后的分词
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(kw.ToString()));

            Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                kws.Add(token.TermText());
                //Console.WriteLine(token.TermText());
            }

            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            //foreach (string word in kw.Split(' '))//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            //{
            //    query.Add(new Term("body", word));
            //}
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));

            //query.Add(new Term("body", kw));//body中含有kw的文章
            foreach (var word in kws)
            {
                query.Add(new Term("body", word));
            }

            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            //可以用来实现分页功能
            this.listBox1.Items.Clear();
            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int      docId = docs[i].doc;                      //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId);              //找到文档id对应的文档详细信息
                this.listBox1.Items.Add(doc.Get("number") + "\n"); // 取出放进字段的值
                this.listBox1.Items.Add(doc.Get("body") + "\n");
                this.listBox1.Items.Add("-----------------------\n");
            }
        }
        protected Query Pq(float boost, int slop, String field, params String[] texts)
        {
            PhraseQuery query = new PhraseQuery();

            foreach (String text in texts)
            {
                query.Add(new Term(field, text));
            }
            query.SetBoost(boost);
            query.SetSlop(slop);
            return(query);
        }
        /// <summary>
        /// 创建索引库
        /// </summary>
        //private void CreateSearchIndex()
        //{
        //    string indexPath = @"C:\lucenedir";//注意和磁盘上文件夹的大小写一致,否则会报错。将创建的分词内容放在该目录下。
        //    FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());//指定索引文件(打开索引目录) FS指的是就是FileSystem
        //    bool isUpdate = IndexReader.IndexExists(directory);//IndexReader:对索引进行读取的类。该语句的作用:判断索引库文件夹是否存在以及索引特征文件是否存在。
        //    if (isUpdate)
        //    {
        //        //同时只能有一段代码对索引库进行写操作。当使用IndexWriter打开directory时会自动对索引库文件上锁。
        //        //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁(提示一下:如果我现在正在写着已经加锁了,但是还没有写完,这时候又来一个请求,那么不就解锁了吗?这个问题后面会解决)
        //        if (IndexWriter.IsLocked(directory))
        //        {
        //            IndexWriter.Unlock(directory);
        //        }
        //    }
        //    IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);//向索引库中写索引。这时在这里加锁。
        //    List<SearchContent> articelList = ArticelService.LoadEntities(a => a.DelFlag == 0).Select(a => new SearchContent() { Id = a.ID, Content = a.ArticleContent, Title = a.Title }).ToList();

        //    List<SearchContent> photoList = PhotoInfoService.LoadEntities(a => a.DelFlag == 0).Select(a => new SearchContent() { Id = a.ID, Content = a.PictureContent, Title = a.Title }).ToList();
        //    List<SearchContent> videoList = VideoFileInfoService.LoadEntities(a => a.DelFlag == 0).Select(a => new SearchContent() { Id = a.ID, Content = a.VideoContent, Title = a.Title }).ToList();

        //    articelList.AddRange(photoList);

        //    articelList.AddRange(videoList);
        //    foreach (var model in articelList)
        //    {
        //        writer.DeleteDocuments(new Term("Id", model.Id.ToString()));//删除
        //        Document document = new Document();//表示一篇文档。
        //        //Field.Store.YES:表示是否存储原值。只有当Field.Store.YES在后面才能用doc.Get("number")取出值来.Field.Index. NOT_ANALYZED:不进行分词保存
        //        document.Add(new Field("Id", model.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

        //        //Field.Index. ANALYZED:进行分词保存:也就是要进行全文的字段要设置分词 保存(因为要进行模糊查询)

        //        //Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS:不仅保存分词还保存分词的距离。
        //        document.Add(new Field("Title", model.Title, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
        //        document.Add(new Field("Content", model.Content, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
        //        writer.AddDocument(document);

        //    }

        //    writer.Close();//会自动解锁。
        //    directory.Close();//不要忘了Close,否则索引结果搜不到
        //}
        /// <summary>
        /// 搜索的实现
        /// </summary>
        /// <returns></returns>
        private List <SearchContent> SearchIndexContent()
        {
            string indexPath = @"C:\lucenedir";
            string k         = Request["txtSearch"];

            string[]      kw        = Common.WebCommon.GetPanGuWord(k).ToArray();
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            foreach (string word in kw)//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            {
                query.Add(new Term("Content", word));
            }
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));
            // query.Add(new Term("body", kw));//body中含有kw的文章
            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            //可以用来实现分页功能
            List <SearchContent> list = new List <SearchContent>();

            for (int i = 0; i < docs.Length; i++)
            {
                SearchContent searchContent = new SearchContent();
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int      docId = docs[i].doc;                           //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId);                   //找到文档id对应的文档详细信息
                searchContent.Id      = Convert.ToInt32(doc.Get("Id")); // 取出放进字段的值
                searchContent.Flag    = Convert.ToInt32(doc.Get("Flag"));
                searchContent.AddDate = Convert.ToDateTime(doc.Get("AddDate"));
                searchContent.Title   = doc.Get("Title");
                searchContent.Content = Common.WebCommon.CreateHightLight(k, doc.Get("Content"));
                list.Add(searchContent);
            }
            //搜索一个词向明细表中插入一条记录
            SearchDetails searchDetail = new SearchDetails();

            searchDetail.Id             = Guid.NewGuid();
            searchDetail.KeyWords       = k;
            searchDetail.SearchDateTime = DateTime.Now;
            SearchDetailsService.AddEntity(searchDetail);

            return(list);
        }
Example #13
0
        /// <summary>
        /// 获取搜索结果
        /// </summary>
        /// <param name="msg">搜索内容</param>
        /// <returns>搜索结果</returns>
        private List <ViewModelContent> GetSearchResult(string msg)
        {
            List <ViewModelContent> viewModelList = new List <ViewModelContent>();
            string indexPath = @"D:\stu\DotNet\练习\BlangenOA\lucenedir";
            //对用户输入的搜索条件进行拆分。
            List <string> keywordList = Common.WebCommon.PanguSplitWords(msg);
            FSDirectory   directory   = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader      = IndexReader.Open(directory, true);
            IndexSearcher searcher    = new IndexSearcher(reader);
            //添加搜索条件
            PhraseQuery query = new PhraseQuery();

            foreach (string keyword in keywordList)
            {
                //Abstract中含有keyword的文章
                query.Add(new Term("Abstract", keyword));
            }
            //多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            query.SetSlop(100);
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            //根据query查询条件进行查询,查询结果放入collector容器
            searcher.Search(query, null, collector);
            //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.//可以用来实现分页功能
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;
            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int      docId = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId); //找到文档id对应的文档详细信息
                viewModelList.Add(new ViewModelContent()
                {
                    // 取出放进字段的值
                    ID    = Convert.ToInt32(doc.Get("ID")),
                    Title = doc.Get("Title"),
                    //高亮显示
                    Abstract = Common.WebCommon.CreateHightLight(msg, doc.Get("Abstract"))
                });
            }
            //用户将搜索内容存储到明细表中
            SearchDetailsBll.AddEntity(new SearchDetails()
            {
                Id             = Guid.NewGuid(),
                KeyWords       = msg,
                SearchDateTime = DateTime.Now
            });
            return(viewModelList);
        }
        public static List <SearchResult> SeartchContact(string keyword, int startIndex, int pageSize, out int totalCount)
        {
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(ContactIndexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);

            IEnumerable <string> keyList = SplitHelper.SplitWords(keyword);

            PhraseQuery queryContact = new PhraseQuery();

            foreach (var key in keyList)
            {
                queryContact.Add(new Term("contactInfo", key));
            }
            queryContact.SetSlop(100);

            BooleanQuery query = new BooleanQuery();

            query.Add(queryContact, BooleanClause.Occur.SHOULD); // SHOULD => 表示或者

            // TopScoreDocCollector:盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            // 使用query这个查询条件进行搜索,搜索结果放入collector
            searcher.Search(query, null, collector);
            // 首先获取总条数
            totalCount = collector.GetTotalHits();
            // 从查询结果中取出第m条到第n条的数据
            ScoreDoc[] docs = collector.TopDocs(startIndex, pageSize).scoreDocs;
            // 遍历查询结果
            List <SearchResult> resultList = new List <SearchResult>();

            for (int i = 0; i < docs.Length; i++)
            {
                // 拿到文档的id
                int docId = docs[i].doc;
                // 所以查询结果中只有id,具体内容需要二次查询
                // 根据id查询内容:放进去的是Document,查出来的还是Document
                Lucene.Net.Documents.Document doc = searcher.Doc(docId);
                SearchResult result = new SearchResult();
                result.UserId      = doc.Get("id");
                result.Name        = doc.Get("name");
                result.Email       = doc.Get("email");
                result.PhoneNumber = doc.Get("phone");
                result.Position    = doc.Get("position");
                resultList.Add(result);
            }

            return(resultList);
        }
Example #15
0
        public List <ViewModelContent> ShowSearchContent()
        {
            string        indexPath = @"E:\GitProgram\HLX.OA.WenApp\lucenedir";
            List <string> kw        = HLX.OA.Common.WebCommon.PanGuSplitWord(Request["txtSearch"]);//对用户输入的搜索条件进行拆分。


            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            for (int i = 0; i < kw.Count; i++)
            {
                query.Add(new Term("Content", kw[i]));//body中含有kw的文章
            }

            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
                                                                                        //可以用来实现分页功能
                                                                                        //  this.listBox1.Items.Clear();

            List <ViewModelContent> vieModelList = new List <ViewModelContent>();

            for (int i = 0; i < docs.Length; i++)
            {
                ViewModelContent viewModel = new ViewModelContent();
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int      docId = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId); //找到文档id对应的文档详细信息
                viewModel.Id      = Convert.ToInt32(doc.Get("Id"));
                viewModel.Title   = doc.Get("Title");
                viewModel.Content = Common.WebCommon.CreateHightLight(Request["txtSearch"], doc.Get("Content"));//高亮显示 搜索内容
                vieModelList.Add(viewModel);
            }
            Model.SearchDetials searchDetail = new SearchDetials();
            searchDetail.Id             = Guid.NewGuid();
            searchDetail.KeyWords       = Request["txtSearch"];
            searchDetail.SearchDateTime = DateTime.Now;
            SearchDetialsService.AddEntity(searchDetail);



            return(vieModelList);
        }
Example #16
0
        private void button1_Click_1(object sender, EventArgs e)
        {
            string        indexPath = "c:/index";
            string        kw        = textBox1.Text;
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            PhraseQuery   query     = new PhraseQuery();

            //todo:把用户输入的关键词进行拆词
            //char[] str = textBox1.Text.ToCharArray();
            //for (int i = 0; i < str.Length; i++)
            //{
            //    query.Add(new Term("name", str[i].ToString()));
            //}

            List <String> list        = new List <string>();
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(textBox1.Text));
            Token         token       = null;

            while ((token = tokenStream.Next()) != null)
            {
                list.Add(token.TermText());
            }

            for (int i = 0; i < list.Count; i++)
            {
                query.Add(new Term("name", list[i].ToString()));
            }

            query.SetSlop(100);
            TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);

            searcher.Search(query, null, collector);
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;

            for (int i = 0; i < docs.Length; i++)
            {
                int docId = docs[i].doc;//取到文档的编号(主键,这个是Lucene .net分配的)
                //检索结果中只有文档的id,如果要取Document,则需要Doc再去取
                //降低内容占用
                Document doc  = searcher.Doc(docId);//根据id找Document
                string   code = doc.Get("code");
                string   name = doc.Get("name");

                MessageBox.Show("code:" + code + "name:" + name);
            }
        }
Example #17
0
        public static void PhraseQueryTest(Analyzer analyzer, string field, string keyword, int slop)
        {
            Console.WriteLine("====PhraseQuery====");
            string[]    arr   = keyword.Trim().Split(new char[] { ' ', ',', ',', '、' }, StringSplitOptions.RemoveEmptyEntries);
            PhraseQuery query = new PhraseQuery();

            foreach (string item in arr)
            {
                query.Add(new Term(field, item));
            }
            query.SetSlop(slop);
            ShowQueryExpression(analyzer, query, keyword);
            SearchToShow(query);
            Console.WriteLine();
        }
        public static List <int> SeartchUser(string keyword)
        {
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(UserInfoIndexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);

            IEnumerable <string> keyList = SplitHelper.SplitWords(keyword);

            PhraseQuery queryUserInfo = new PhraseQuery();

            foreach (var key in keyList)
            {
                queryUserInfo.Add(new Term("userInfo", key));
            }
            queryUserInfo.SetSlop(100);

            BooleanQuery query = new BooleanQuery();

            query.Add(queryUserInfo, BooleanClause.Occur.SHOULD);


            // TopScoreDocCollector:盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            // 使用query这个查询条件进行搜索,搜索结果放入collector
            searcher.Search(query, null, collector);
            // 首先获取总条数
            int totalCount = collector.GetTotalHits();

            //这里取所有的数据 以方便后续的查找。
            ScoreDoc[] docs = collector.TopDocs(0, totalCount).scoreDocs;
            // 遍历查询结果
            List <int> resultList = new List <int>();

            for (int i = 0; i < docs.Length; i++)
            {
                // 拿到文档的id,因为Document可能非常占内存(DataSet和DataReader的区别)
                int docId = docs[i].doc;
                // 所以查询结果中只有id,具体内容需要二次查询
                // 根据id查询内容:放进去的是Document,查出来的还是Document
                Lucene.Net.Documents.Document doc = searcher.Doc(docId);
                int uid = Convert.ToInt32(doc.Get("id"));
                resultList.Add(uid);
            }

            return(resultList);
        }
        /// <summary>
        /// Match a multi-word phrase exactly. (This is like how QueryParser handles quoted phrases)
        /// </summary>
        /// <param name="field"></param>
        /// <param name="phrase"></param>
        /// <param name="slop"></param>
        /// <returns></returns>
        public QueryBuilder MatchPhrase(string field, string phrase, int slop = 0)
        {
            if (string.IsNullOrWhiteSpace(phrase))
            {
                return(this);
            }

            var query = new PhraseQuery();

            foreach (var token in _analyzer.TokenListFromString(phrase))
            {
                query.Add(new Term(field, token));
            }

            query.SetSlop(slop);

            return(AddSubQuery(query));
        }
Example #20
0
        /// <summary>
        /// 从索引库中检索关键字
        /// </summary>
        private void SearchFromIndexData()
        {
            string        indexPath = Context.Server.MapPath("~/IndexData");
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            //把用户输入的关键字进行分词
            foreach (string word in Common.SplitContent.SplitWords(Request.QueryString["SearchKey"]))
            {
                query.Add(new Term("content", word));
            }
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100);  //指定关键词相隔最大距离

            //TopScoreDocCollector盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;

            //展示数据实体对象集合
            List <PZYM.Shop.Model.Books> bookResult = new List <PZYM.Shop.Model.Books>();

            for (int i = 0; i < docs.Length; i++)
            {
                int      docId = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId); //根据文档id来获得文档对象Document


                PZYM.Shop.Model.Books book = new PZYM.Shop.Model.Books();
                book.Title = doc.Get("title");
                //book.ContentDescription = doc.Get("content");//未使用高亮
                //搜索关键字高亮显示 使用盘古提供高亮插件
                book.ContentDescription = Common.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content"));
                book.Id = Convert.ToInt32(doc.Get("id"));
                bookResult.Add(book);
            }
            Repeater1.DataSource = bookResult;
            Repeater1.DataBind();
        }
        /// <summary>
        /// 获取搜索结果
        /// </summary>
        protected void btnGetSearchResult_Click(object sender, EventArgs e)
        {
            string keyword = txtKeyWords.Text;

            string        indexPath = Context.Server.MapPath("~/Index"); // 索引文档保存位置
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            // 查询条件
            PhraseQuery query = new PhraseQuery();

            // 等同于 where contains("msg",kw)
            query.Add(new Term("msg", keyword));
            // 两个词的距离大于100(经验值)就不放入搜索结果,因为距离太远相关度就不高了
            query.SetSlop(100);
            // TopScoreDocCollector:盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            // 使用query这个查询条件进行搜索,搜索结果放入collector
            searcher.Search(query, null, collector);
            // 从查询结果中取出第m条到第n条的数据
            // collector.GetTotalHits()表示总的结果条数
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;
            // 遍历查询结果
            IList <SearchResult> resultList = new List <SearchResult>();

            for (int i = 0; i < docs.Length; i++)
            {
                // 拿到文档的id,因为Document可能非常占内存(DataSet和DataReader的区别)
                int docId = docs[i].doc;
                // 所以查询结果中只有id,具体内容需要二次查询
                // 根据id查询内容:放进去的是Document,查出来的还是Document
                Document     doc    = searcher.Doc(docId);
                SearchResult result = new SearchResult();
                result.Id  = Convert.ToInt32(doc.Get("id"));
                result.Msg = HighlightHelper.HighLight(keyword, doc.Get("msg"));

                resultList.Add(result);
            }

            // 绑定到Repeater
            rptSearchResult.DataSource = resultList;
            rptSearchResult.DataBind();
        }
Example #22
0
        public void SearchFromIndexData()
        {
            string        indexPath = System.Web.HttpContext.Current.Server.MapPath("~/IndexData");
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            //把用户输入的关键字进行分词
            foreach (string word in WitKeyDu.Site.Web.SplitContent.SplitWords(Request.QueryString["SearchKey"]))
            {
                query.Add(new Term("ForumContent", word));
            }
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100);  //指定关键词相隔最大距离

            //TopScoreDocCollector盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;

            //展示数据实体对象集合
            List <Forum> ForumResult = new List <Forum>();

            for (int i = 0; i < docs.Length; i++)
            {
                int      docId = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId); //根据文档id来获得文档对象Document


                Forum forum = new Forum();
                forum.ForumName = doc.Get("ForumName");
                //book.Title = doc.Get("title");
                ////book.ContentDescription = doc.Get("content");//未使用高亮
                ////搜索关键字高亮显示 使用盘古提供高亮插件
                forum.ForumContent = WitKeyDu.Site.Web.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("ForumContent"));
                forum.ForumTypeID  = Convert.ToInt32(doc.Get("ID"));
                ForumResult.Add(forum);
            }
        }
Example #23
0
        public static List <ViewModelContent> ShowSearchContent(HttpRequestBase Request, string msg)
        {
            string        indexPath = ConfigurationManager.AppSettings["lucenedirPath"];
            List <string> list      = Common.WebCommon.PanGuSplitWord(msg);//对用户输入的搜索条件进行拆分。
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            foreach (string word in list)//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            {
                query.Add(new Term("Title", word));
            }
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));
            // query.Add(new Term("body", kw));//body中含有kw的文章
            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            //可以用来实现分页功能
            List <Models.ViewModelContent> viewModelList = new List <Models.ViewModelContent>();

            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                Models.ViewModelContent viewModel = new Models.ViewModelContent();
                int      docId = docs[i].doc;                     //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId);             //找到文档id对应的文档详细信息
                viewModel.Id    = Convert.ToInt32(doc.Get("Id")); // 取出放进字段的值
                viewModel.Title = doc.Get("Title");
                // viewModel.Content = Common.WebCommon.CreateHightLight(Request["txtSearch"], doc.Get("Content"));//将搜索的关键字高亮显示。
                viewModelList.Add(viewModel);
            }
            //删除汇总表中所有的数据
            //再将搜索的词插入到明细表中

            return(viewModelList);
        }
Example #24
0
        private void SearchFromIndexData(string searchkey)
        {
            string        indexPath = Context.Server.MapPath("~/IndexData");
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            //把用户输入的关键字进行分词
            foreach (string word in SplitContent.SplitWords(searchkey))
            {
                query.Add(new Term("TITLE", word));
            }
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100);  //指定关键词相隔最大距离

            //TopScoreDocCollector盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //collector.GetTotalHits()

            //展示数据实体对象集合
            for (int i = 0; i < docs.Length; i++)
            {
                int      docID = docs[i].doc;         //得到查询结果文档的ID(Lucene内部分配的ID)
                Document doc   = searcher.Doc(docID); //根据文档ID来获得文档对象Document
                SUC_NEWS mod   = new SUC_NEWS();
                mod.TITLE = SplitContent.HightLight(searchkey, doc.Get("TITLE"));
                mod.TITLE = string.IsNullOrEmpty(mod.TITLE) ? doc.Get("TITLE") : mod.TITLE;
                //book.ContentDESCRPTION = doc.Get("content");//未使用高亮
                //搜索关键字高亮显示 使用盘古提供高亮插件
                mod.CONTENT     = SplitContent.HightLight(searchkey, doc.Get("CONTENT"));
                mod.CONTENT     = string.IsNullOrEmpty(mod.CONTENT) ? doc.Get("CONTENT") : mod.CONTENT;
                mod.CONTENT     = mod.CONTENT.Replace("<b>", "");
                mod.ID          = Convert.ToInt32(doc.Get("ID"));
                mod.pandaWebUrl = doc.Get("URL");
                modResult.Add(mod);
            }
        }
Example #25
0
        //搜索包含关键词
        public static List <JobSerach> SearchContent(string kw, int index, int skipCount)
        {
            //string indexPath = lucenePath;//最好将该项放在配置文件中。
            kw = kw.ToLower();
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(lucenePath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            foreach (string word in SplitWord(kw))//将用户输入的搜索内容进行了盘古分词、
            {
                query.Add(new Term("Title", word));
                //query.Add(new Term("Content", word));
                //query.Add(new Term("MaiDian", word));
            }
            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                             //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(index - 1, skipCount).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.       //可以用来实现分页功能

            List <JobSerach> list = new List <JobSerach>();

            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int       docId  = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document  doc    = searcher.Doc(docId); //找到文档id对应的文档详细信息
                JobSerach result = new JobSerach();
                result.Title        = Highlight(kw, doc.Get("Title"));
                result.Id           = Convert.ToInt32(doc.Get("Id"));
                result.ImageAddress = doc.Get("ImageAddress");
                result.MaiDian      = doc.Get("MaiDian");
                result.Price        = double.Parse(doc.Get("Price"));
                result.Content      = doc.Get("Content");
                list.Add(result);
            }
            return(list);
        }
Example #26
0
        /// <summary>
        /// 批量添加PhraseQuery
        /// </summary>
        /// <param name="phrase">待搜索的短语</param>
        /// <param name="fieldNameAndBoosts">字段名称及权重集合</param>
        /// <param name="occur">搜索条件间的关系</param>
        /// <param name="asFilter">是否作为过滤器条件</param>
        /// <returns></returns>
        public LuceneSearchBuilder WithPhrases(Dictionary <string, BoostLevel> fieldNameAndBoosts, string phrase, BooleanClause.Occur occur, bool asFilter = false)
        {
            string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase);

            if (string.IsNullOrEmpty(filteredPhrase))
            {
                return(this);
            }

            string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase);
            if (nameSegments.Length == 1)
            {
                return(WithFields(fieldNameAndBoosts, nameSegments[0], false, occur, asFilter));
            }
            else
            {
                BooleanQuery query = new BooleanQuery();
                foreach (var fieldNameAndBoost in fieldNameAndBoosts)
                {
                    PhraseQuery phraseQuery = new PhraseQuery();
                    foreach (var nameSegment in nameSegments)
                    {
                        phraseQuery.Add(new Term(fieldNameAndBoost.Key, nameSegment));
                    }

                    phraseQuery.SetSlop(PhraseQuerySlop);
                    SetBoost(phraseQuery, fieldNameAndBoost.Value);
                    query.Add(phraseQuery, occur);
                }

                if (asFilter)
                {
                    filters.Add(new BooleanClause(query, BooleanClause.Occur.MUST));
                }
                else
                {
                    clauses.Add(new BooleanClause(query, BooleanClause.Occur.MUST));
                }

                return(this);
            }
        }
Example #27
0
        /// <summary>
        ///     全文搜索
        /// </summary>
        /// <param name="keyword"></param>
        /// <param name="startRowIndex"></param>
        /// <param name="pageSize"></param>
        /// <param name="totalCount"></param>
        /// <returns></returns>
        public static List <SearchResult> DoSearch(string keyword, int startRowIndex, int pageSize, out int totalCount)
        {
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(_indexPath), new NoLockFactory());
            IndexReader reader    = IndexReader.Open(directory, true);
            //IndexSearcher是进行搜索的类
            var searcher = new IndexSearcher(reader);
            var query    = new PhraseQuery();

            foreach (string word in GetKeyWords(keyword))
            {
                query.Add(new Term("body", word));
            }
            query.SetSlop(100);                                                       //相聚100以内才算是查询到
            TopScoreDocCollector collector = TopScoreDocCollector.create(1024, true); //最大1024条记录

            searcher.Search(query, null, collector);
            totalCount = collector.GetTotalHits();                                  //返回总条数
            ScoreDoc[] docs = collector.TopDocs(startRowIndex, pageSize).scoreDocs; //分页,下标应该从0开始吧,0是第一条记录
            var        list = new List <SearchResult>();

            for (int i = 0; i < docs.Length; i++)
            {
                int docId = docs[i].doc; //取文档的编号,这个是主键,lucene.net分配
                //检索结果中只有文档的id,如果要取Document,则需要Doc再去取
                //降低内容占用
                Document doc      = searcher.Doc(docId);
                string   number   = doc.Get("number");
                string   title    = doc.Get("title");
                string   fullPath = doc.Get("fullPath");
                string   body     = doc.Get("body");

                var searchResult = new SearchResult
                {
                    Number      = number,
                    Title       = title,
                    FullPath    = fullPath,
                    BodyPreview = Preview(body, keyword)
                };
                list.Add(searchResult);
            }
            return(list);
        }
Example #28
0
        /// <summary>
        /// 添加PhraseQuery
        /// </summary>
        /// <param name="fieldName">待搜索的字段名称</param>
        /// <param name="phrase">待搜索的短语</param>
        /// <param name="boostLevel">权重级别</param>
        /// <param name="asFilter">是否作为过滤条件</param>
        /// <returns>LuceneSearchBuilder</returns>
        public LuceneSearchBuilder WithPhrase(string fieldName, string phrase, BoostLevel?boostLevel = null, bool asFilter = false)
        {
            string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase);

            if (string.IsNullOrEmpty(filteredPhrase))
            {
                return(this);
            }

            if (filteredPhrase.Length == 1)
            {
                return(WithField(fieldName, filteredPhrase, false, boostLevel, asFilter));
            }

            string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase);

            PhraseQuery phraseQuery = new PhraseQuery();

            foreach (var nameSegment in nameSegments)
            {
                phraseQuery.Add(new Term(fieldName, nameSegment));
            }

            phraseQuery.SetSlop(PhraseQuerySlop);

            if (boostLevel.HasValue)
            {
                SetBoost(phraseQuery, boostLevel.Value);
            }

            if (asFilter)
            {
                filters.Add(new BooleanClause(phraseQuery, BooleanClause.Occur.MUST));
            }
            else
            {
                clauses.Add(new BooleanClause(phraseQuery, BooleanClause.Occur.MUST));
            }

            return(this);
        }
Example #29
0
        /// <summary>
        /// 从索引库中检索关键字
        /// </summary>
        public List <T> SearchFromIndexData <T>(string kw, Action <T, Document> AddResult) where T : new()
        {
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);

            PhraseQuery query = new PhraseQuery();//搜索条件

            //把用户输入的关键字进行分词
            foreach (string word in SplitContent.SplitWords(kw))
            {
                query.Add(new Term("content", word)); //多个查询条件时 为且的关系
            }
            query.SetSlop(100);                       //指定关键词相隔最大距离


            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);   //TopScoreDocCollector盛放查询结果的容器

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果

            //展示数据实体对象集合
            List <T> bookResult = new List <T>();

            for (int i = 0; i < docs.Length; i++)
            {
                int      docId  = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document doc    = searcher.Doc(docId); //根据文档id来获得文档对象Document
                T        entity = new T();
                AddResult(entity, doc);
                bookResult.Add(entity);
                //Bid book = new Bid();
                //book.Title = doc.Get("title");
                //book.BidContent = SplitContent.HightLight(kw, doc.Get("content")); //搜索关键字高亮显示 使用盘古提供高亮插件
                //book.ID = Convert.ToInt32(doc.Get("id"));
                //bookResult.Add(book);
            }
            return(bookResult);
        }
        private List <SearchResult> DoSearch(int startRowIndex, int pageSize, out int totalCount)
        {
            string      indexPath = "E:/Index";
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader    = IndexReader.Open(directory, true);
            //IndexSearcher是进行搜索的类
            IndexSearcher searcher = new IndexSearcher(reader);
            PhraseQuery   query    = new PhraseQuery();

            foreach (string word in CommonHelper.SplitWord(kw))
            {
                query.Add(new Term("body", word));
            }
            query.SetSlop(100);                                                       //相聚100以内才算是查询到
            TopScoreDocCollector collector = TopScoreDocCollector.create(1024, true); //最大1024条记录

            searcher.Search(query, null, collector);
            totalCount = collector.GetTotalHits();                                           //返回总条数
            ScoreDoc[]          docs = collector.TopDocs(startRowIndex, pageSize).scoreDocs; //分页,下标应该从0开始吧,0是第一条记录
            List <SearchResult> list = new List <SearchResult>();

            for (int i = 0; i < docs.Length; i++)
            {
                int docID = docs[i].doc;//取文档的编号,这个是主键,lucene.net分配
                //检索结果中只有文档的id,如果要取Document,则需要Doc再去取
                //降低内容占用
                Document doc    = searcher.Doc(docID);
                string   number = doc.Get("number");
                string   title  = doc.Get("title");
                string   body   = doc.Get("body");

                SearchResult searchResult = new SearchResult()
                {
                    Number = number, Score = title, Uri = Preview(body, kw)
                };
                list.Add(searchResult);
            }
            return(list);
        }
        /// <summary>
        /// Adds a standard type clause to this instance
        /// </summary>
        /// <param name="term">Term to add to this query.</param>
        /// <param name="occurrence">Defines how the term is added to this query.</param>
        /// <param name="slop">The amount of allowed slop in a phrase query.</param>
        /// <remarks>
        /// Slop is the amount of movement each word is allowed in a non-exact phrase query.
        /// For instance if you search for "Adobe Systems Incorporated" and the slop is set to 0 then
        /// only results with that term is allowed. If you set the slop to 2 then two movements can be
        /// made, max, for each word. In the same example with slop set to 2 results would be returned 
        /// for "Adobe Systems Incorporated", "Adobe Incorporated Systems", "Systems Adobe Incorporated",
        /// and "Systems Incorporated Adobe". 
        /// </remarks>
        public void AddBooleanClause(SearchTerm term, ClauseOccurrence occurrence, int slop)
        {
            if (term == null)
                throw new ArgumentNullException("term", "term cannot be null");
            IncrementTotalClauses(1);

            if (term.IsPhrase) {
                PhraseQuery phraseQuery = new PhraseQuery();
                phraseQuery.Add(term.GetLuceneTerm());
                phraseQuery.SetSlop(slop);
                phraseQuery.SetBoost(term.Boost);
                this.luceneQuery.Add(phraseQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                phraseQuery = null;
            }
            else {
                TermQuery termQuery = new TermQuery(term.GetLuceneTerm());
                termQuery.SetBoost(term.Boost);
                this.luceneQuery.Add(termQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                termQuery = null;
            }
        }