public virtual void TestSpanNearVersusPhrase()
     Term t1 = RandomTerm();
     Term t2 = RandomTerm();
     SpanQuery[] subquery = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
     SpanNearQuery q1 = new SpanNearQuery(subquery, 0, true);
     PhraseQuery q2 = new PhraseQuery();
     AssertSameSet(q1, q2);
Exemple #2
        static Query ExecuteAnalyzer(Analyzer analyzer, string field, string text)
            TokenStream tokenStream = analyzer.TokenStream(field, new StringReader(text));

            ITermAttribute termAttribute = tokenStream.AddAttribute <ITermAttribute>();
            IPositionIncrementAttribute positionIncrementAttribute = tokenStream.AddAttribute <IPositionIncrementAttribute>();

            List <List <Term> > terms   = new List <List <Term> >();
            List <Term>         current = null;

            while (tokenStream.IncrementToken())
                if (positionIncrementAttribute.PositionIncrement > 0)
                    current = new List <Term>();
                if (current != null)
                    current.Add(new Term(field, termAttribute.Term));

            if (terms.Count == 1 && terms[0].Count == 1)
                return(new TermQuery(terms[0][0]));
            else if (terms.Select(l => l.Count).Sum() == terms.Count)
                PhraseQuery phraseQuery = new PhraseQuery();
                foreach (var positionList in terms)
                MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery();
                foreach (var positionList in terms)
Exemple #3
        public override Query VisitPhraseQuery(PhraseQuery phraseq)
            var terms = phraseq.GetTerms();
            var field = terms[0].Field();


            var positions = new int[terms.Length];
            for (var i = 0; i < positions.Length; i++)
                positions[i] = i;

            var pieces = new string[terms.Length];
            for (var i = 0; i < terms.Length; i++)
                var pos = positions[i];
                var s = pieces[pos];
                if (s == null)
                    s = (terms[i]).Text();
                    s += "|" + (terms[i]).Text();
                pieces[pos] = s;
            for (var i = 0; i < pieces.Length; i++)
                if (i > 0)
                    _text.Append(' ');
                var s = pieces[i];
                if (s == null)

            var slop = phraseq.GetSlop();
            if (slop != 0)


            return base.VisitPhraseQuery(phraseq);
        public virtual void TestORPhrase()
            PhraseQuery phrase1 = new PhraseQuery();

            phrase1.Add(new Term("field", "foo"));
            phrase1.Add(new Term("field", "bar"));
            PhraseQuery phrase2 = new PhraseQuery();

            phrase2.Add(new Term("field", "star"));
            phrase2.Add(new Term("field", "wars"));
            BooleanQuery expected = new BooleanQuery();

            expected.Add(phrase1, BooleanClause.Occur.SHOULD);
            expected.Add(phrase2, BooleanClause.Occur.SHOULD);

            assertEquals(expected, Parse("\"foo bar\"|\"star wars\""));
Exemple #5
        private QueryProvider <TLuceneEntity> _Phrase <TResult>(string value, Expression <Func <TLuceneEntity, TResult> > selector, Occur occur)
            TrimTerm(ref value);

            if (IsNull(ref value))

            var q = new PhraseQuery();

            q.Add(new Term(selector.GetName(), value));

            _current.Add(q, occur);

        public static Query BuildExactFieldValueClause(Index index, string fieldName, string fieldValue)
            Assert.ArgumentNotNull(index, "Index");

            if (string.IsNullOrEmpty(fieldName) || string.IsNullOrEmpty(fieldValue))

            fieldValue = IdHelper.ProcessGUIDs(fieldValue);

            var phraseQuery = new PhraseQuery();

            phraseQuery.Add(new Term(fieldName.ToLowerInvariant(), fieldValue.ToLowerInvariant()));

Exemple #7
        public virtual void TestANDPhrase()
            PhraseQuery phrase1 = new PhraseQuery();

            phrase1.Add(new Term("field", "foo"));
            phrase1.Add(new Term("field", "bar"));
            PhraseQuery phrase2 = new PhraseQuery();

            phrase2.Add(new Term("field", "star"));
            phrase2.Add(new Term("field", "wars"));
            BooleanQuery expected = new BooleanQuery();

            expected.Add(phrase1, Occur.MUST);
            expected.Add(phrase2, Occur.MUST);

            assertEquals(expected, Parse("\"foo bar\"+\"star wars\""));
Exemple #8
        private List <Models.ViewSearchContentModel> SearchBookContent()
            var           indexPath = @"C:\Users\Victor\Desktop\LuceneNetDir";
            var           kw        = Common.WebCommon.GetPanGuWord(Request["txtContent"]);
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            PhraseQuery query = new PhraseQuery();

            foreach (var word in kw)//先用空格,让用户去分词,空格分隔的就是词"计算机   专业"
                query.Add(new Term("body", word));
            //WITH_POSITIONS_OFFSETS -> 盘古分词在建立索引的时候已经将干扰词间距记录,lucene.net只要匹配就可以了
            query.Slop = 100;//多个查询条件词之间的最大距离,在文章中相隔太远也就无意义(例如 “大学生”这个查询条件和“简历”这个查询条件之间如果间隔的词太多也就没有意义了。)
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);

            searcher.Search(query, null, collector);                               //根据query查询条件进行查询,查询结果放入cllector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.TotalHits).ScoreDocs; //得到所有查询结果中的文档,collector.TotalHits:表示总条数;TopDocs(300,20):表示得到300(从300开始),到320(结束)的文档内容
            var        list = new List <Models.ViewSearchContentModel>();

            for (int i = 0; i < docs.Length; i++)
                int      docId = docs[i].Doc;         //得到查询结果文档的id(Lucene内容分配的id)
                Document doc   = searcher.Doc(docId); //找到文档id对应的文档详细信息
                list.Add(new Models.ViewSearchContentModel
                    Id      = doc.Get("Id"),
                    Title   = doc.Get("Title"),
                    Content = Common.WebCommon.CreateHighLight(Request["txtContent"], doc.Get("Content"))//搜索内容关键字高亮显示

            SearchDetailsBll.AddEntity(new SearchDetails
                Id             = Guid.NewGuid(),
                KeyWords       = Request["txtContent"],
                SearchDateTime = DateTime.Now,

Exemple #9
        public Query CreateQuery(ILuceneQueryService builder, LuceneQueryContext context, string type, JObject query)
            if (type != "match_phrase")

            var first = query.Properties().First();

            var    phraseQuery = new PhraseQuery();
            JToken value;

            switch (first.Value.Type)
            case JTokenType.String:
                value = first.Value;

            case JTokenType.Object:
                var obj = (JObject)first.Value;

                if (!obj.TryGetValue("value", out value))
                    throw new ArgumentException("Missing value in match phrase query");

                // TODO: read "analyzer" property

                if (obj.TryGetValue("slop", out var slop))
                    phraseQuery.Slop = slop.Value <int>();


            default: throw new ArgumentException("Invalid wildcard query");

            foreach (var term in LuceneQueryService.Tokenize(first.Name, value.Value <string>(), context.DefaultAnalyzer))
                phraseQuery.Add(new Term(first.Name, term));

        /// <summary>
        /// Create the quoted query.
        /// </summary>
        /// <param name="text">The quoted text.</param>
        /// <param name="searchFieldName">The name of the field to search.</param>
        /// <returns>The query result.</returns>
        private PhraseQuery CreateQuotedQuery(string text, string searchFieldName)
            // Create the query.
            PhraseQuery query = new PhraseQuery();

            query.Slop = 2;

            // Quoted search exact phase.
            string[] words = text.Words();
            for (int i = 0; i < words.Length; i++)
                // Add the query.
                query.Add(new Term(searchFieldName, words[i].Trim().ToLower()));

            // Return the query.
        /// <summary>
        /// Match a multi-word phrase exactly. (This is like how QueryParser handles quoted phrases)
        /// </summary>
        /// <param name="field"></param>
        /// <param name="phrase"></param>
        /// <param name="slop"></param>
        /// <returns></returns>
        public QueryBuilder MatchPhrase(string field, string phrase, int slop = 0)
            if (string.IsNullOrWhiteSpace(phrase))

            var query = new PhraseQuery();

            foreach (var token in _analyzer.TokenListFromString(phrase))
                query.Add(new Term(field, token));


        public void TestSmallerFragSizeThanPhraseQuery()
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();

            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.Add(new Term(F, "abcdefgh"));
            phraseQuery.Add(new Term(F, "jklmnopqrs"));

            FieldFragList ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "abcdefgh   jklmnopqrs"), sflb.minFragCharSize);

            assertEquals(1, ffl.FragInfos.size());
            if (VERBOSE)
            assertEquals("subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.FragInfos[0].toString());
Exemple #13
        public void TestSmallerFragSizeThanPhraseQuery()
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();

            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.Add(new Term(F, "abcdefgh"));
            phraseQuery.Add(new Term(F, "jklmnopqrs"));

            FieldFragList ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "abcdefgh   jklmnopqrs"), sflb.minFragCharSize);

            assertEquals(1, ffl.FragInfos.size());
            if (Verbose)
                Console.WriteLine(ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture));                                           // LUCENENET specific: use invariant culture, since we are culture-aware
            assertEquals("subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware
Exemple #14
        public virtual void TestDemo()
            Analyzer analyzer = new MockAnalyzer(Random());

            // Store the index in memory:
            using (Directory directory = NewDirectory())
                string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
                string text = "this is the text to be indexed. " + longTerm;

                // To store an index on disk, use this instead:
                // Directory directory = File("/tmp/testindex"));
                using (RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, analyzer)))
                    Documents.Document doc = new Documents.Document();
                    doc.Add(NewTextField("fieldname", text, Field.Store.YES));

                // Now search the index:
                using (IndexReader ireader = DirectoryReader.Open(directory)) // read-only=true
                    IndexSearcher isearcher = NewSearcher(ireader);

                    Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
                    Query query = new TermQuery(new Term("fieldname", "text"));
                    TopDocs hits = isearcher.Search(query, null, 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    // Iterate through the results:
                    for (int i = 0; i < hits.ScoreDocs.Length; i++)
                        Documents.Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
                        Assert.AreEqual(text, hitDoc.Get("fieldname"));

                    // Test simple phrase query
                    PhraseQuery phraseQuery = new PhraseQuery();
                    phraseQuery.Add(new Term("fieldname", "to"));
                    phraseQuery.Add(new Term("fieldname", "be"));
                    Assert.AreEqual(1, isearcher.Search(phraseQuery, null, 1).TotalHits);

        /// <summary>
        /// 获取搜索结果
        /// </summary>
        protected void btnGetSearchResult_Click(object sender, EventArgs e)
            string keyword = txtKeyWords.Text;

            string        indexPath = Context.Server.MapPath("~/Index"); // 索引文档保存位置
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            // 查询条件
            PhraseQuery query = new PhraseQuery();

            // 等同于 where contains("msg",kw)
            query.Add(new Term("msg", keyword));
            // 两个词的距离大于100(经验值)就不放入搜索结果,因为距离太远相关度就不高了
            // TopScoreDocCollector:盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            // 使用query这个查询条件进行搜索,搜索结果放入collector
            searcher.Search(query, null, collector);
            // 从查询结果中取出第m条到第n条的数据
            // collector.GetTotalHits()表示总的结果条数
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;
            // 遍历查询结果
            IList <SearchResult> resultList = new List <SearchResult>();

            for (int i = 0; i < docs.Length; i++)
                // 拿到文档的id,因为Document可能非常占内存(DataSet和DataReader的区别)
                int docId = docs[i].doc;
                // 所以查询结果中只有id,具体内容需要二次查询
                // 根据id查询内容:放进去的是Document,查出来的还是Document
                Document     doc    = searcher.Doc(docId);
                SearchResult result = new SearchResult();
                result.Id  = Convert.ToInt32(doc.Get("id"));
                result.Msg = HighlightHelper.HighLight(keyword, doc.Get("msg"));


            // 绑定到Repeater
            rptSearchResult.DataSource = resultList;
Exemple #16
        public static List <Record> SearchFromIndex(string searchKey)
            string        indexPath = HttpContext.Current.Server.MapPath("~/IndexData");
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            BooleanQuery bQuery = new BooleanQuery();
            PhraseQuery  tQuery = new PhraseQuery();
            PhraseQuery  cQuery = new PhraseQuery();

            foreach (string word in SplitWords(searchKey))
                tQuery.Add(new Term("title", word));
                cQuery.Add(new Term("content", word));
            cQuery.Slop = 100;             //指定关键词相隔最大距离
            bQuery.Add(tQuery, Occur.SHOULD);
            bQuery.Add(cQuery, Occur.SHOULD);

            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);

            searcher.Search(bQuery, null, collector);            //根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0, collector.TotalHits).ScoreDocs;

            List <Record> records = new List <Record>();

            for (int i = 0; i < docs.Length; i++)
                int      docId  = docs[i].Doc;          //得到查询结果文档的id(Lucene内部分配的id)
                Document doc    = searcher.Doc(docId);  //根据文档id来获得文档对象Document
                var      record = new Record();
                record.Title   = doc.Get("title");
                record.Summary = doc.Get("summary");
                record.ID      = Guid.Parse(doc.Get("id"));

Exemple #17
        private IList <Query> BuildQueries()
            IList <Query> queries = new List <Query>();

            BooleanQuery booleanAB = new BooleanQuery();

            booleanAB.Add(new TermQuery(new Term("contents", "a")), Occur.SHOULD);
            booleanAB.Add(new TermQuery(new Term("contents", "b")), Occur.SHOULD);

            PhraseQuery phraseAB = new PhraseQuery();

            phraseAB.Add(new Term("contents", "a"));
            phraseAB.Add(new Term("contents", "b"));

            PhraseQuery phraseABC = new PhraseQuery();

            phraseABC.Add(new Term("contents", "a"));
            phraseABC.Add(new Term("contents", "b"));
            phraseABC.Add(new Term("contents", "c"));

            BooleanQuery booleanAC = new BooleanQuery();

            booleanAC.Add(new TermQuery(new Term("contents", "a")), Occur.SHOULD);
            booleanAC.Add(new TermQuery(new Term("contents", "c")), Occur.SHOULD);

            PhraseQuery phraseAC = new PhraseQuery();

            phraseAC.Add(new Term("contents", "a"));
            phraseAC.Add(new Term("contents", "c"));

            PhraseQuery phraseACE = new PhraseQuery();

            phraseACE.Add(new Term("contents", "a"));
            phraseACE.Add(new Term("contents", "c"));
            phraseACE.Add(new Term("contents", "e"));

Exemple #18
        // This is a simplified query builder which works for single Terms and single Phrases
        // Returns null, TermQuery, or PhraseQuery
        public static Lucene.Net.Search.Query GetFieldQuery(Analyzer analyzer, string field, string queryText)
            TokenStream stream = analyzer.TokenStream(field, new StringReader(queryText));
            TokenFilter filter = new CachingTokenFilter(stream);


            // This attribute way of getting token properties sucks, but it's the non-obsolete one.
            var           attr1   = (TermAttribute)filter.GetAttribute(typeof(TermAttribute));
            var           attr2   = (PositionIncrementAttribute)filter.GetAttribute(typeof(PositionIncrementAttribute));
            Func <string> getText = () => attr1 != null?attr1.Term() : null;

            Func <int> getPositionIncrement = () => attr2 != null?attr2.GetPositionIncrement() : 1;

            // 0 tokens
            if (!filter.IncrementToken())
                return(new BooleanQuery());

            // 1 token?
            string token1   = getText();
            int    position = 0;

            if (!filter.IncrementToken())
                return(new TermQuery(new Term(field, token1)));

            // many tokens - handle first token
            PhraseQuery ret = new PhraseQuery();

            ret.Add(new Term(field, token1));

                // handle rest of tokens
                string tokenNext = getText();
                position += getPositionIncrement();
                ret.Add(new Term(field, tokenNext), position);
            }while (filter.IncrementToken());

Exemple #19
        /// <summary>
        /// 从索引库中检索关键字
        /// </summary>
        private void SearchFromIndexData()
            string        indexPath = Context.Server.MapPath("~/IndexData");
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            PhraseQuery query = new PhraseQuery();

            foreach (string word in Common.SplitContent.SplitWords(Request.QueryString["SearchKey"]))
                query.Add(new Term("content", word));
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100);  //指定关键词相隔最大距离

            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;

            List <PZYM.Shop.Model.Books> bookResult = new List <PZYM.Shop.Model.Books>();

            for (int i = 0; i < docs.Length; i++)
                int      docId = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId); //根据文档id来获得文档对象Document

                PZYM.Shop.Model.Books book = new PZYM.Shop.Model.Books();
                book.Title = doc.Get("title");
                //book.ContentDescription = doc.Get("content");//未使用高亮
                //搜索关键字高亮显示 使用盘古提供高亮插件
                book.ContentDescription = Common.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content"));
                book.Id = Convert.ToInt32(doc.Get("id"));
            Repeater1.DataSource = bookResult;
Exemple #20
        public static List <ViewModelContent> ShowSearchContent(HttpRequestBase Request, string msg)
            string        indexPath = ConfigurationManager.AppSettings["lucenedirPath"];
            List <string> list      = Common.WebCommon.PanGuSplitWord(msg);//对用户输入的搜索条件进行拆分。
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            PhraseQuery query = new PhraseQuery();

            foreach (string word in list)//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
                query.Add(new Term("Title", word));
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));
            // query.Add(new Term("body", kw));//body中含有kw的文章
            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            List <Models.ViewModelContent> viewModelList = new List <Models.ViewModelContent>();

            for (int i = 0; i < docs.Length; i++)
                Models.ViewModelContent viewModel = new Models.ViewModelContent();
                int      docId = docs[i].doc;                     //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId);             //找到文档id对应的文档详细信息
                viewModel.Id    = Convert.ToInt32(doc.Get("Id")); // 取出放进字段的值
                viewModel.Title = doc.Get("Title");
                // viewModel.Content = Common.WebCommon.CreateHightLight(Request["txtSearch"], doc.Get("Content"));//将搜索的关键字高亮显示。

        public Query ProcessQuery(string QueryString, bool PreProcess)
            BooleanQuery FinalQuery = new BooleanQuery();


            if (PreProcess)
                //Extract all phrases
                List <string> PhraseList = _PosTaggerLexicalParser.FindPhrases(QueryString);
                QueryString = _PosTaggerLexicalParser.Parse(QueryString);

                foreach (string phrase in PhraseList)
                    PhraseQuery abstractPhraseQuery = new PhraseQuery();
                    PhraseQuery titlePhraseQuery    = new PhraseQuery();

                    abstractPhraseQuery.Add(new Term(SEDocument.ABSTRACT_FN, phrase));
                    titlePhraseQuery.Add(new Term(SEDocument.TITLE_FN, phrase));

                    abstractPhraseQuery.Boost = 1.2F;
                    abstractPhraseQuery.Slop  = 3;
                    FinalQuery.Add(abstractPhraseQuery, Occur.SHOULD);
                    titlePhraseQuery.Boost = 4.0F;
                    titlePhraseQuery.Slop  = 3;
                    FinalQuery.Add(titlePhraseQuery, Occur.SHOULD);
                FinalQuery.Add(_MultiFieldQueryParser.Parse(QueryString), Occur.SHOULD);

            string[] tokens = _PosTaggerLexicalParser.TokeniseString(QueryString.Replace('\"', ' ').Replace('[', ' ').Replace(']', ' '));
            foreach (string term in tokens)
                FinalQuery.Add(_MultiFieldQueryParser.Parse(term.Replace("~", "") + "~"), Occur.SHOULD);

            FinalQuery.MinimumNumberShouldMatch = 2;

Exemple #22
        public void SearchFromIndexData()
            string        indexPath = System.Web.HttpContext.Current.Server.MapPath("~/IndexData");
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            PhraseQuery query = new PhraseQuery();

            foreach (string word in WitKeyDu.Site.Web.SplitContent.SplitWords(Request.QueryString["SearchKey"]))
                query.Add(new Term("ForumContent", word));
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100);  //指定关键词相隔最大距离

            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;

            List <Forum> ForumResult = new List <Forum>();

            for (int i = 0; i < docs.Length; i++)
                int      docId = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId); //根据文档id来获得文档对象Document

                Forum forum = new Forum();
                forum.ForumName = doc.Get("ForumName");
                //book.Title = doc.Get("title");
                ////book.ContentDescription = doc.Get("content");//未使用高亮
                ////搜索关键字高亮显示 使用盘古提供高亮插件
                forum.ForumContent = WitKeyDu.Site.Web.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("ForumContent"));
                forum.ForumTypeID  = Convert.ToInt32(doc.Get("ID"));
Exemple #23
        public virtual void TestDemo()
            Analyzer analyzer = new MockAnalyzer(random());

            // Store the index in memory:
            Directory directory = newDirectory();
            // To store an index on disk, use this instead:
            // Directory directory = File("/tmp/testindex"));
            RandomIndexWriter iwriter  = new RandomIndexWriter(random(), directory, analyzer);
            Document          doc      = new Document();
            string            longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
            string            text     = "this is the text to be indexed. " + longTerm;

            doc.add(newTextField("fieldname", text, Field.Store.YES));

            // Now search the index:
            IndexReader   ireader   =; // read-only=true
            IndexSearcher isearcher = newSearcher(ireader);

            Assert.AreEqual(1, TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
            Query   query = new TermQuery(new Term("fieldname", "text"));
            TopDocs hits  =, null, 1);

            Assert.AreEqual(1, hits.totalHits);
            // Iterate through the results:
            for (int i = 0; i < hits.scoreDocs.length; i++)
                Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
                Assert.AreEqual(text, hitDoc.get("fieldname"));

            // Test simple phrase query
            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.add(new Term("fieldname", "to"));
            phraseQuery.add(new Term("fieldname", "be"));
            Assert.AreEqual(1,, null, 1).totalHits);

Exemple #24
        public override Query VisitPhraseQuery(PhraseQuery phraseq)
            var         terms    = phraseq.GetTerms();
            PhraseQuery newQuery = null;

            int index = 0;
            int count = terms.Length;

            while (index < count)
                var visitedTerm = VisitTerm(terms[index]);
                if (newQuery != null)
                    if (visitedTerm != null)
                else if (visitedTerm != terms[index])
                    newQuery = new PhraseQuery();
                    for (int i = 0; i < index; i++)
                    if (visitedTerm != null)
            if (newQuery != null)
                if (newQuery.GetTerms().Length > 0)
Exemple #25
        /// <summary>
        ///     全文搜索
        /// </summary>
        /// <param name="keyword"></param>
        /// <param name="startRowIndex"></param>
        /// <param name="pageSize"></param>
        /// <param name="totalCount"></param>
        /// <returns></returns>
        public static List <SearchResult> DoSearch(string keyword, int startRowIndex, int pageSize, out int totalCount)
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(_indexPath), new NoLockFactory());
            IndexReader reader    = IndexReader.Open(directory, true);
            var searcher = new IndexSearcher(reader);
            var query    = new PhraseQuery();

            foreach (string word in GetKeyWords(keyword))
                query.Add(new Term("body", word));
            query.SetSlop(100);                                                       //相聚100以内才算是查询到
            TopScoreDocCollector collector = TopScoreDocCollector.create(1024, true); //最大1024条记录

            searcher.Search(query, null, collector);
            totalCount = collector.GetTotalHits();                                  //返回总条数
            ScoreDoc[] docs = collector.TopDocs(startRowIndex, pageSize).scoreDocs; //分页,下标应该从0开始吧,0是第一条记录
            var        list = new List <SearchResult>();

            for (int i = 0; i < docs.Length; i++)
                int docId = docs[i].doc; //取文档的编号,这个是主键,lucene.net分配
                Document doc      = searcher.Doc(docId);
                string   number   = doc.Get("number");
                string   title    = doc.Get("title");
                string   fullPath = doc.Get("fullPath");
                string   body     = doc.Get("body");

                var searchResult = new SearchResult
                    Number      = number,
                    Title       = title,
                    FullPath    = fullPath,
                    BodyPreview = Preview(body, keyword)
Exemple #26
        /// <summary>
        /// 批量添加PhraseQuery
        /// </summary>
        /// <param name="phrase">待搜索的短语</param>
        /// <param name="fieldNameAndBoosts">字段名称及权重集合</param>
        /// <param name="occur">搜索条件间的关系</param>
        /// <param name="asFilter">是否作为过滤器条件</param>
        /// <returns></returns>
        public LuceneSearchBuilder WithPhrases(Dictionary <string, BoostLevel> fieldNameAndBoosts, string phrase, BooleanClause.Occur occur, bool asFilter = false)
            string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase);

            if (string.IsNullOrEmpty(filteredPhrase))

            string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase);
            if (nameSegments.Length == 1)
                return(WithFields(fieldNameAndBoosts, nameSegments[0], false, occur, asFilter));
                BooleanQuery query = new BooleanQuery();
                foreach (var fieldNameAndBoost in fieldNameAndBoosts)
                    PhraseQuery phraseQuery = new PhraseQuery();
                    foreach (var nameSegment in nameSegments)
                        phraseQuery.Add(new Term(fieldNameAndBoost.Key, nameSegment));

                    SetBoost(phraseQuery, fieldNameAndBoost.Value);
                    query.Add(phraseQuery, occur);

                if (asFilter)
                    filters.Add(new BooleanClause(query, BooleanClause.Occur.MUST));
                    clauses.Add(new BooleanClause(query, BooleanClause.Occur.MUST));

Exemple #27
        private void SearchFromIndexData(string searchkey)
            string        indexPath = Context.Server.MapPath("~/IndexData");
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            PhraseQuery query = new PhraseQuery();

            foreach (string word in SplitContent.SplitWords(searchkey))
                query.Add(new Term("TITLE", word));
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100);  //指定关键词相隔最大距离

            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //collector.GetTotalHits()

            for (int i = 0; i < docs.Length; i++)
                int      docID = docs[i].doc;         //得到查询结果文档的ID(Lucene内部分配的ID)
                Document doc   = searcher.Doc(docID); //根据文档ID来获得文档对象Document
                SUC_NEWS mod   = new SUC_NEWS();
                mod.TITLE = SplitContent.HightLight(searchkey, doc.Get("TITLE"));
                mod.TITLE = string.IsNullOrEmpty(mod.TITLE) ? doc.Get("TITLE") : mod.TITLE;
                //book.ContentDESCRPTION = doc.Get("content");//未使用高亮
                //搜索关键字高亮显示 使用盘古提供高亮插件
                mod.CONTENT     = SplitContent.HightLight(searchkey, doc.Get("CONTENT"));
                mod.CONTENT     = string.IsNullOrEmpty(mod.CONTENT) ? doc.Get("CONTENT") : mod.CONTENT;
                mod.CONTENT     = mod.CONTENT.Replace("<b>", "");
                mod.ID          = Convert.ToInt32(doc.Get("ID"));
                mod.pandaWebUrl = doc.Get("URL");
Exemple #28
 public void flatten(Query sourceQuery, Dictionary<Query,Query> flatQueries)
     if (sourceQuery is BooleanQuery)
         BooleanQuery bq = (BooleanQuery)sourceQuery;
         foreach (BooleanClause clause in bq.GetClauses())
             if (!clause.IsProhibited())
                 flatten(clause.GetQuery(), flatQueries);
     else if (sourceQuery is DisjunctionMaxQuery)
         DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
         System.Collections.IEnumerator en = dmq.Iterator();
         while (en.MoveNext())
             Query query = (Query)en.Current;
             flatten(query, flatQueries);
     else if (sourceQuery is TermQuery)
         if (!flatQueries.ContainsKey(sourceQuery))
             flatQueries.Add(sourceQuery, sourceQuery);
     else if (sourceQuery is PhraseQuery)
         if (!flatQueries.ContainsKey(sourceQuery))
             PhraseQuery pq = (PhraseQuery)sourceQuery;
             if (pq.GetTerms().Length > 1)
                 flatQueries.Add(pq, pq);
             else if (pq.GetTerms().Length == 1)
                 Query q = new TermQuery(pq.GetTerms()[0]);
                 flatQueries.Add(q, q);
     // else discard queries
Exemple #29
        public static List <JobSerach> SearchContent(string kw, int index, int skipCount)
            //string indexPath = lucenePath;//最好将该项放在配置文件中。
            kw = kw.ToLower();
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(lucenePath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            PhraseQuery query = new PhraseQuery();

            foreach (string word in SplitWord(kw))//将用户输入的搜索内容进行了盘古分词、
                query.Add(new Term("Title", word));
                //query.Add(new Term("Content", word));
                //query.Add(new Term("MaiDian", word));
            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                             //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(index - 1, skipCount).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.       //可以用来实现分页功能

            List <JobSerach> list = new List <JobSerach>();

            for (int i = 0; i < docs.Length; i++)
                int       docId  = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document  doc    = searcher.Doc(docId); //找到文档id对应的文档详细信息
                JobSerach result = new JobSerach();
                result.Title        = Highlight(kw, doc.Get("Title"));
                result.Id           = Convert.ToInt32(doc.Get("Id"));
                result.ImageAddress = doc.Get("ImageAddress");
                result.MaiDian      = doc.Get("MaiDian");
                result.Price        = double.Parse(doc.Get("Price"));
                result.Content      = doc.Get("Content");
        private Query clause(String field, float boost, params String[] terms)
            Query q;

            if (terms.Length == 1)
                q = new TermQuery(new Term(field, terms[0]));
                PhraseQuery pq = new PhraseQuery();
                foreach (String term in terms)
                    pq.Add(new Term(field, term));
                q = pq;
            q.Boost = (boost);
        public void TestPhraseQuery()
            SimpleFragListBuilder sflb = new SimpleFragListBuilder();

            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.Add(new Term(F, "a"));
            phraseQuery.Add(new Term(F, "b"));

            FieldFragList ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "c d e"), 20);

            assertEquals(0, ffl.FragInfos.size());

            ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "a c b"), 20);
            assertEquals(0, ffl.FragInfos.size());

            ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "a b c"), 20);
            assertEquals(1, ffl.FragInfos.size());
            assertEquals("subInfos=(ab((0,3)))/1.0(0,20)", ffl.FragInfos[0].toString());
        public virtual void TestWithPendingDeletes3()
            // main directory
            Directory dir = NewDirectory();
            // auxiliary directory
            Directory aux = NewDirectory();

            SetUpDirs(dir, aux);
            IndexWriter writer = NewWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND));

            // Adds 10 docs, then replaces them with another 10
            // docs, so 10 pending deletes:
            for (int i = 0; i < 20; i++)
                Document doc = new Document();
                doc.Add(NewStringField("id", "" + (i % 10), Field.Store.NO));
                doc.Add(NewTextField("content", "bbb " + i, Field.Store.NO));
                writer.UpdateDocument(new Term("id", "" + (i % 10)), doc);

            // Deletes one of the 10 added docs, leaving 9:
            PhraseQuery q = new PhraseQuery();
            q.Add(new Term("content", "bbb"));
            q.Add(new Term("content", "14"));



            VerifyNumDocs(dir, 1039);
            VerifyTermDocs(dir, new Term("content", "aaa"), 1030);
            VerifyTermDocs(dir, new Term("content", "bbb"), 9);

        public void TestGetBestFragmentsFilteredPhraseQuery()
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                        numHighlights = 0;
                                        var rf = new TermRangeFilter("contents", "john", "john", true, true);
                                        var pq = new PhraseQuery();
                                        pq.Add(new Term("contents", "john"));
                                        pq.Add(new Term("contents", "kennedy"));
                                        var fq = new FilteredQuery(pq, rf);

                                        helper.DoStandardHighlights(analyzer, searcher, hits, query, this);
                                        // Currently highlights "John" and "Kennedy" separately
                                        Assert.IsTrue(numHighlights == 2,
                                                      "Failed to find correct number of highlights " + numHighlights +
                                                      " found");

 protected internal virtual void SmokeTestSearcher(IndexSearcher s)
     RunQuery(s, new TermQuery(new Term("body", "united")));
     RunQuery(s, new TermQuery(new Term("titleTokenized", "states")));
     PhraseQuery pq = new PhraseQuery();
     pq.Add(new Term("body", "united"));
     pq.Add(new Term("body", "states"));
     RunQuery(s, pq);
        public virtual void TestPositionIncrementMultiFields()
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            IndexReader reader = writer.Reader;

            IndexSearcher searcher = NewSearcher(reader);
            PhraseQuery query = new PhraseQuery();
            query.Add(new Term("indexed_not_tokenized", "test1"));
            query.Add(new Term("indexed_not_tokenized", "test2"));

            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            DoAssert(searcher.Doc(hits[0].Doc), true);
        public virtual void TestCJKSloppyPhrase()
            // individual CJK chars as terms
            SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(this);

            PhraseQuery expected = new PhraseQuery();
            expected.Slop = 3;
            expected.Add(new Term("field", "中"));
            expected.Add(new Term("field", "国"));

            QueryBuilder builder = new QueryBuilder(analyzer);
            Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国", 3));
        public virtual void TestPhraseQueryPositionIncrements()
            PhraseQuery expected = new PhraseQuery();
            expected.Add(new Term("field", "1"));
            expected.Add(new Term("field", "2"), 2);

            CharacterRunAutomaton stopList = new CharacterRunAutomaton((new RegExp("[sS][tT][oO][pP]")).ToAutomaton());

            Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopList);

            QueryBuilder builder = new QueryBuilder(analyzer);
            Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "1 stop 2"));
Exemple #38
 public virtual void TestPhrase()
     PhraseQuery query = new PhraseQuery();
     query.Add(new Term("field", "seventy"));
     query.Add(new Term("field", "seven"));
     CheckHits(query, new int[] { 77, 177, 277, 377, 477, 577, 677, 777, 877, 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677, 1777, 1877, 1977 });
Exemple #39
 public virtual void TestPhrase2()
     PhraseQuery query = new PhraseQuery();
     query.Add(new Term("field", "seventish"));
     query.Add(new Term("field", "sevenon"));
     CheckHits(query, new int[] { });
           * This shows how to construct a phrase query containing shingles.
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testShingleAnalyzerWrapperPhraseQuery() throws Exception
        public virtual void testShingleAnalyzerWrapperPhraseQuery()
            PhraseQuery q = new PhraseQuery();

            TokenStream ts = analyzer.tokenStream("content", "this sentence");
              int j = -1;

              PositionIncrementAttribute posIncrAtt = ts.addAttribute(typeof(PositionIncrementAttribute));
              CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute));

              while (ts.incrementToken())
            j += posIncrAtt.PositionIncrement;
            string termText = termAtt.ToString();
            q.add(new Term("content", termText), j);

            ScoreDoc[] hits =, null, 1000).scoreDocs;
            int[] ranks = new int[] {0};
            compareRanks(hits, ranks);
        /// <summary>
        /// Adds a standard type clause to this instance
        /// </summary>
        /// <param name="term">Term to add to this query.</param>
        /// <param name="occurrence">Defines how the term is added to this query.</param>
        /// <param name="slop">The amount of allowed slop in a phrase query.</param>
        /// <remarks>
        /// Slop is the amount of movement each word is allowed in a non-exact phrase query.
        /// For instance if you search for "Adobe Systems Incorporated" and the slop is set to 0 then
        /// only results with that term is allowed. If you set the slop to 2 then two movements can be
        /// made, max, for each word. In the same example with slop set to 2 results would be returned 
        /// for "Adobe Systems Incorporated", "Adobe Incorporated Systems", "Systems Adobe Incorporated",
        /// and "Systems Incorporated Adobe". 
        /// </remarks>
        public void AddBooleanClause(SearchTerm term, ClauseOccurrence occurrence, int slop)
            if (term == null)
                throw new ArgumentNullException("term", "term cannot be null");

            if (term.IsPhrase) {
                PhraseQuery phraseQuery = new PhraseQuery();
                this.luceneQuery.Add(phraseQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                phraseQuery = null;
            else {
                TermQuery termQuery = new TermQuery(term.GetLuceneTerm());
                this.luceneQuery.Add(termQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                termQuery = null;