public virtual void TestSpanNearVersusPhrase()
     Term t1 = RandomTerm();
     Term t2 = RandomTerm();
     SpanQuery[] subquery = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
     SpanNearQuery q1 = new SpanNearQuery(subquery, 0, true);
     PhraseQuery q2 = new PhraseQuery();
     AssertSameSet(q1, q2);
Ejemplo n.º 2
        public void TestGetBestFragmentsFilteredPhraseQuery()
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                        numHighlights = 0;
                                        var rf = new TermRangeFilter("contents", "john", "john", true, true);
                                        var pq = new PhraseQuery();
                                        pq.Add(new Term("contents", "john"));
                                        pq.Add(new Term("contents", "kennedy"));
                                        var fq = new FilteredQuery(pq, rf);

                                        helper.DoStandardHighlights(analyzer, searcher, hits, query, this);
                                        // Currently highlights "John" and "Kennedy" separately
                                        Assert.IsTrue(numHighlights == 2,
                                                      "Failed to find correct number of highlights " + numHighlights +
                                                      " found");

Ejemplo n.º 3
        public virtual void TestCJKSloppyPhrase()
            // individual CJK chars as terms
            SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(this);

            PhraseQuery expected = new PhraseQuery();
            expected.Slop = 3;
            expected.Add(new Term("field", "中"));
            expected.Add(new Term("field", "国"));

            QueryBuilder builder = new QueryBuilder(analyzer);
            Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国", 3));
Ejemplo n.º 4
        public virtual void TestPhraseQueryPositionIncrements()
            PhraseQuery expected = new PhraseQuery();
            expected.Add(new Term("field", "1"));
            expected.Add(new Term("field", "2"), 2);

            CharacterRunAutomaton stopList = new CharacterRunAutomaton((new RegExp("[sS][tT][oO][pP]")).ToAutomaton());

            Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopList);

            QueryBuilder builder = new QueryBuilder(analyzer);
            Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "1 stop 2"));
Ejemplo n.º 5
        /// <summary>
        /// Searches the specified query.
        /// </summary>
        /// <param name="query">The query.</param>
        /// <param name="searchType">Type of the search.</param>
        /// <param name="entities">The entities.</param>
        /// <param name="fieldCriteria">The field criteria.</param>
        /// <param name="size">The size.</param>
        /// <param name="from">From.</param>
        /// <param name="totalResultsAvailable">The total results available.</param>
        /// <returns></returns>
        public override List <IndexModelBase> Search(string query, SearchType searchType, List <int> entities, SearchFieldCriteria fieldCriteria, int?size, int?from, out long totalResultsAvailable)
            List <IndexModelBase> documents = new List <IndexModelBase>();

            totalResultsAvailable = 0;
            bool allEntities = false;

            BooleanQuery  queryContainer  = new BooleanQuery();
            List <string> combinedFields  = new List <string>();
            List <Type>   indexModelTypes = new List <Type>();
            Dictionary <string, Analyzer> combinedFieldAnalyzers = new Dictionary <string, Analyzer>();

            using (RockContext rockContext = new RockContext())
                var entityTypeService = new EntityTypeService(rockContext);
                if (entities == null || entities.Count == 0)
                    //add all entities
                    allEntities = true;
                    var selectedEntityTypes = EntityTypeCache.All().Where(e => e.IsIndexingSupported && e.IsIndexingEnabled && e.FriendlyName != "Site");

                    foreach (var entityTypeCache in selectedEntityTypes)

                foreach (var entityId in entities)
                    // get entities search model name
                    var entityType = entityTypeService.GetNoTracking(entityId);

                    // check if this is a person model, if so we need to add two model types one for person and the other for businesses
                    // wish there was a cleaner way to do this
                    if (entityType.Guid == SystemGuid.EntityType.PERSON.AsGuid())

                indexModelTypes = indexModelTypes.Distinct().ToList();
                CombineIndexTypes(indexModelTypes, out combinedFields, out combinedFieldAnalyzers);

                if (entities != null && entities.Count != 0 && !allEntities)
                    var indexModelTypesQuery = new BooleanQuery();
                    indexModelTypes.ForEach(f => indexModelTypesQuery.Add(new TermQuery(new Term("type", f.Name.ToLower())), Occur.SHOULD));
                    queryContainer.Add(indexModelTypesQuery, Occur.MUST);

            TopDocs topDocs = null;
            // Use the analyzer in fieldAnalyzers if that field is in that dictionary, otherwise use StandardAnalyzer.
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer: new StandardAnalyzer(_matchVersion), fieldAnalyzers: combinedFieldAnalyzers);

            if (fieldCriteria != null && fieldCriteria.FieldValues?.Count > 0)
                Occur occur = fieldCriteria.SearchType == CriteriaSearchType.And ? Occur.MUST : Occur.SHOULD;
                foreach (var match in fieldCriteria.FieldValues)
                    BooleanClause booleanClause = new BooleanClause(new TermQuery(new Term(match.Field, match.Value)), occur);
                    booleanClause.Query.Boost = match.Boost;

            switch (searchType)
            case SearchType.ExactMatch:
                var wordQuery = new BooleanQuery();

                if (!string.IsNullOrWhiteSpace(query))
                    var words = query.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                    foreach (var word in words)
                        var innerQuery = new BooleanQuery();
                        combinedFields.ForEach(f => innerQuery.Add(new PrefixQuery(new Term(f, word.ToLower())), Occur.SHOULD));
                        wordQuery.Add(innerQuery, Occur.SHOULD);

                if (wordQuery.Count() != 0)
                    queryContainer.Add(wordQuery, Occur.MUST);

                // special logic to support emails
                if (query.Contains("@"))
                    queryContainer.Add(new BooleanClause(new TermQuery(new Term("Email", query)), Occur.SHOULD));

                // special logic to support phone search
                if (query.IsDigitsOnly())
                    queryContainer.Add(new BooleanClause(new WildcardQuery(new Term("PhoneNumbers", "*" + query + "*")), Occur.SHOULD));

                // add a search for all the words as one single search term
                foreach (var field in combinedFields)
                    var phraseQuery = new PhraseQuery();
                    phraseQuery.Add(new Term(field, query.ToLower()));
                    queryContainer.Add(phraseQuery, Occur.SHOULD);


            case SearchType.Fuzzy:
                foreach (var field in combinedFields)
                    queryContainer.Add(new FuzzyQuery(new Term(field, query.ToLower())), Occur.SHOULD);


            case SearchType.Wildcard:
                bool enablePhraseSearch = true;

                if (!string.IsNullOrWhiteSpace(query))
                    BooleanQuery wildcardQuery = new BooleanQuery();

                    // break each search term into a separate query and add the * to the end of each
                    var queryTerms = query.Split(' ').Select(p => p.Trim()).ToList();

                    // special logic to support emails
                    if (queryTerms.Count == 1 && query.Contains("@"))
                        wildcardQuery.Add(new WildcardQuery(new Term("Email", "*" + query.ToLower() + "*")), Occur.SHOULD);
                        enablePhraseSearch = false;
                        foreach (var queryTerm in queryTerms)
                            if (!string.IsNullOrWhiteSpace(queryTerm))
                                var innerQuery = new BooleanQuery();
                                combinedFields.ForEach(f => innerQuery.Add(new PrefixQuery(new Term(f, queryTerm.ToLower())), Occur.SHOULD));
                                wildcardQuery.Add(innerQuery, Occur.MUST);

                        // add special logic to help boost last names
                        if (queryTerms.Count() > 1 && (indexModelTypes.Contains(typeof(PersonIndex)) || indexModelTypes.Contains(typeof(BusinessIndex))))
                            BooleanQuery nameQuery = new BooleanQuery
                                { new PrefixQuery(new Term("FirstName", queryTerms.First().ToLower())), Occur.MUST },
                                { new PrefixQuery(new Term("LastName", queryTerms.Last().ToLower()))
                                      Boost = 30
                                  }, Occur.MUST }
                            wildcardQuery.Add(nameQuery, Occur.SHOULD);

                            nameQuery = new BooleanQuery
                                { new PrefixQuery(new Term("NickName", queryTerms.First().ToLower())), Occur.MUST },
                                { new PrefixQuery(new Term("LastName", queryTerms.Last().ToLower()))
                                      Boost = 30
                                  }, Occur.MUST }
                            wildcardQuery.Add(nameQuery, Occur.SHOULD);

                        // special logic to support phone search
                        if (query.IsDigitsOnly())
                            wildcardQuery.Add(new PrefixQuery(new Term("PhoneNumbers", queryTerms.First().ToLower())), Occur.SHOULD);

                    queryContainer.Add(wildcardQuery, Occur.MUST);

                // add a search for all the words as one single search term
                if (enablePhraseSearch)
                    // add a search for all the words as one single search term
                    foreach (var field in combinedFields)
                        var phraseQuery = new PhraseQuery();
                        phraseQuery.Add(new Term(field, query.ToLower()));
                        queryContainer.Add(phraseQuery, Occur.SHOULD);


            int returnSize = 10;

            if (size.HasValue)
                returnSize = size.Value;


            if (from.HasValue)
                TopScoreDocCollector collector = TopScoreDocCollector.Create(returnSize * 10, true);   // Search for 10 pages with returnSize entries in each page
                _indexSearcher.Search(queryContainer, collector);
                topDocs = collector.GetTopDocs(from.Value, returnSize);
                topDocs = _indexSearcher.Search(queryContainer, returnSize);

            totalResultsAvailable = topDocs.TotalHits;

            if (topDocs != null)
                foreach (var hit in topDocs.ScoreDocs)
                    var document = LuceneDocToIndexModel(queryContainer, hit);
                    if (document != null)

 protected internal virtual void SmokeTestSearcher(IndexSearcher s)
     RunQuery(s, new TermQuery(new Term("body", "united")));
     RunQuery(s, new TermQuery(new Term("titleTokenized", "states")));
     PhraseQuery pq = new PhraseQuery();
     pq.Add(new Term("body", "united"));
     pq.Add(new Term("body", "states"));
     RunQuery(s, pq);
Ejemplo n.º 7
        /// <summary>
        /// 搜索
        /// </summary>
        protected void SearchContent(int page)
            string indexPath = @"C:\lucenedir";//如果不存在就创建lucenedir文件夹 测试环境直接在网站根目录创建,上线后需要独立创建

            string[] kw = BookShop.Common.WebCommon.PanGuSplit(Request["searchText"]);
            //string kw = "面向对象";
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            PhraseQuery query = new PhraseQuery();

            foreach (string word in kw)//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
                //query.Add(new Term("title", word));
                query.Add(new Term("msg", word));
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));
            //query.Add(new Term("body", kw));//body中含有kw的文章
            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            int pageSize = 12;

            page = page < 1 ? 1 : page;
            ScoreDoc[] docs      = collector.TopDocs((page - 1) * pageSize, pageSize).scoreDocs;
            int        pageCount = (int)Math.Ceiling((double)collector.GetTotalHits() / pageSize);

            page = page > pageCount ? pageCount : page;
            //ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            List <SearchContent> list = new List <SearchContent>();

            for (int i = 0; i < docs.Length; i++)
                SearchContent viewmodel = new SearchContent();
                int      docId = docs[i].doc;                        //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId);                //找到文档id对应的文档详细信息
                viewmodel.Id       = Convert.ToInt32(doc.Get("id")); // 取出放进字段的值
                viewmodel.Title    = doc.Get("title");
                viewmodel.ISBN     = doc.Get("isbn");
                viewmodel.Price    = Convert.ToDecimal(doc.Get("price"));
                viewmodel.Discount = Convert.ToInt32(doc.Get("discount"));
                viewmodel.Msg      = Common.WebCommon.CreateHightLight(Request["searchText"], doc.Get("msg"));
            SearchDatails searchmodel = new SearchDatails();

            searchmodel.Id             = Guid.NewGuid();
            searchmodel.KeyWords       = Request["searchText"];
            searchmodel.SearchDateTime = DateTime.Now;
            ViewData["list"]       = list;
            ViewBag.PageIndex      = page;
            ViewBag.PageCount      = pageCount;
            ViewData["booktop"]    = "<div class='bottom-grid'>";
            ViewData["bookfoot"]   = "<div class='clearfix'></div></div>";
            ViewData["searchname"] = Request["searchText"];
Ejemplo n.º 8
 public virtual void TestPhrase()
     PhraseQuery query = new PhraseQuery();
     query.Add(new Term("field", "seventy"));
     query.Add(new Term("field", "seven"));
     CheckHits(query, new int[] { 77, 177, 277, 377, 477, 577, 677, 777, 877, 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677, 1777, 1877, 1977 });
        public static Query GenerateQuery(string fieldName, string query, Analyzer analyzer)
            if (query == null)

            var resultQuery = new BooleanQuery();
            var phraseQuery = new PhraseQuery {
                Slop = 0

            //not much to search, only do exact match
            if (query.Length < 4)
                phraseQuery.Add(new Term(fieldName, query));

                resultQuery.Add(phraseQuery, Occur.MUST);

            //add phrase match with boost, we will add the terms to the phrase below
            phraseQuery.Boost = 20;
            resultQuery.Add(phraseQuery, Occur.SHOULD);

            var tokenStream   = analyzer.TokenStream("SearchText", new StringReader(query));
            var termAttribute = tokenStream.AddAttribute <ITermAttribute>();

            while (tokenStream.IncrementToken())
                var term = termAttribute.Term;

                phraseQuery.Add(new Term(fieldName, term));

                var exactMatch = new TermQuery(new Term(fieldName, term));

                //if the term is larger than 3, we'll do both exact match and wildcard/prefix
                if (term.Length >= 3)
                    var innerQuery = new BooleanQuery();

                    //add exact match with boost
                    exactMatch.Boost = 10;
                    innerQuery.Add(exactMatch, Occur.SHOULD);

                    //add wildcard
                    var pq = new PrefixQuery(new Term(fieldName, term));
                    //needed so that wildcard searches will return a score
                    pq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; //new ErrorCheckingScoringBooleanQueryRewrite();
                    innerQuery.Add(pq, Occur.SHOULD);

                    resultQuery.Add(innerQuery, Occur.MUST);
                    resultQuery.Add(exactMatch, Occur.MUST);

            return(resultQuery.Clauses.Count > 0 ? resultQuery : null);
Ejemplo n.º 10
        public virtual void TestBasic()
            Directory   dir      = NewDirectory();
            Analyzer    analyzer = new MockAnalyzer(Random);
            IndexWriter writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(2).SetSimilarity(new SimpleSimilarity()).SetMergePolicy(NewLogMergePolicy(2)));

            StringBuilder sb   = new StringBuilder(265);
            string        term = "term";

            for (int i = 0; i < 30; i++)
                Document doc = new Document();
                sb.Append(term).Append(" ");
                string content = sb.ToString();
                Field  noTf    = NewField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType);

                Field tf = NewField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType);


            // flush

             * Verify the index
            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            searcher.Similarity = new SimpleSimilarity();

            Term      a  = new Term("noTf", term);
            Term      b  = new Term("tf", term);
            Term      c  = new Term("noTf", "notf");
            Term      d  = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d);

            PhraseQuery pq = new PhraseQuery();

                searcher.Search(pq, 10);
                Assert.Fail("did not hit expected exception");
            catch (Exception e)
                Exception cause = e;
                // If the searcher uses an executor service, the IAE is wrapped into other exceptions
                while (cause.InnerException != null)
                    cause = cause.InnerException;
                if (!(cause is InvalidOperationException))
                    throw new InvalidOperationException("Expected an IAE", e);
                } // else OK because positions are not indexed

            searcher.Search(q1, new CountingHitCollectorAnonymousInnerClassHelper(this));

            searcher.Search(q2, new CountingHitCollectorAnonymousInnerClassHelper2(this));

            searcher.Search(q3, new CountingHitCollectorAnonymousInnerClassHelper3(this));

            searcher.Search(q4, new CountingHitCollectorAnonymousInnerClassHelper4(this));

            BooleanQuery bq = new BooleanQuery();

            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new CountingHitCollectorAnonymousInnerClassHelper5(this));
            Assert.AreEqual(15, CountingHitCollector.Count);

Ejemplo n.º 11
        /// <summary>
        /// Creates a query from the analysis chain.
        /// <para/>
        /// Expert: this is more useful for subclasses such as queryparsers.
        /// If using this class directly, just use <see cref="CreateBooleanQuery(string, string)"/>
        /// and <see cref="CreatePhraseQuery(string, string)"/>. </summary>
        /// <param name="analyzer"> Analyzer used for this query. </param>
        /// <param name="operator"> Default boolean operator used for this query. </param>
        /// <param name="field"> Field to create queries against. </param>
        /// <param name="queryText"> Text to be passed to the analysis chain. </param>
        /// <param name="quoted"> <c>true</c> if phrases should be generated when terms occur at more than one position. </param>
        /// <param name="phraseSlop"> Slop factor for phrase/multiphrase queries. </param>
        protected Query CreateFieldQuery(Analyzer analyzer, Occur @operator, string field, string queryText, bool quoted, int phraseSlop)
            Debug.Assert(@operator == Occur.SHOULD || @operator == Occur.MUST);
            // Use the analyzer to get all the tokens, and then build a TermQuery,
            // PhraseQuery, or nothing based on the term count
            CachingTokenFilter          buffer     = null;
            ITermToBytesRefAttribute    termAtt    = null;
            IPositionIncrementAttribute posIncrAtt = null;
            int  numTokens     = 0;
            int  positionCount = 0;
            bool severalTokensAtSamePosition = false;
            bool hasMoreTokens = false;

            TokenStream source = null;

                source = analyzer.GetTokenStream(field, new StringReader(queryText));
                buffer = new CachingTokenFilter(source);

                if (buffer.HasAttribute <ITermToBytesRefAttribute>())
                    termAtt = buffer.GetAttribute <ITermToBytesRefAttribute>();
                if (buffer.HasAttribute <IPositionIncrementAttribute>())
                    posIncrAtt = buffer.GetAttribute <IPositionIncrementAttribute>();

                if (termAtt != null)
                        hasMoreTokens = buffer.IncrementToken();
                        while (hasMoreTokens)
                            int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1;
                            if (positionIncrement != 0)
                                positionCount += positionIncrement;
                                severalTokensAtSamePosition = true;
                            hasMoreTokens = buffer.IncrementToken();
                    catch (System.IO.IOException)
                        // ignore
            catch (System.IO.IOException e)
                throw new Exception("Error analyzing query text", e);

            // rewind the buffer stream

            BytesRef bytes = termAtt == null ? null : termAtt.BytesRef;

            if (numTokens == 0)
            else if (numTokens == 1)
                    bool hasNext = buffer.IncrementToken();
                    Debug.Assert(hasNext == true);
                catch (System.IO.IOException)
                    // safe to ignore, because we know the number of tokens
                return(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))));
                if (severalTokensAtSamePosition || (!quoted))
                    if (positionCount == 1 || (!quoted))
                        // no phrase query:

                        if (positionCount == 1)
                            // simple case: only one position, with synonyms
                            BooleanQuery q = NewBooleanQuery(true);
                            for (int i = 0; i < numTokens; i++)
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                catch (System.IO.IOException)
                                    // safe to ignore, because we know the number of tokens
                                Query currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
                                q.Add(currentQuery, Occur.SHOULD);
                            // multiple positions
                            BooleanQuery q            = NewBooleanQuery(false);
                            Query        currentQuery = null;
                            for (int i = 0; i < numTokens; i++)
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                catch (System.IO.IOException)
                                    // safe to ignore, because we know the number of tokens
                                if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0)
                                    if (!(currentQuery is BooleanQuery))
                                        Query t = currentQuery;
                                        currentQuery = NewBooleanQuery(true);
                                        ((BooleanQuery)currentQuery).Add(t, Occur.SHOULD);
                                    ((BooleanQuery)currentQuery).Add(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))), Occur.SHOULD);
                                    if (currentQuery != null)
                                        q.Add(currentQuery, @operator);
                                    currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
                            q.Add(currentQuery, @operator);
                        // phrase query:
                        MultiPhraseQuery mpq = NewMultiPhraseQuery();
                        mpq.Slop = phraseSlop;
                        IList <Term> multiTerms = new List <Term>();
                        int          position   = -1;
                        for (int i = 0; i < numTokens; i++)
                            int positionIncrement = 1;
                                bool hasNext = buffer.IncrementToken();
                                Debug.Assert(hasNext == true);
                                if (posIncrAtt != null)
                                    positionIncrement = posIncrAtt.PositionIncrement;
                            catch (System.IO.IOException)
                                // safe to ignore, because we know the number of tokens

                            if (positionIncrement > 0 && multiTerms.Count > 0)
                                if (enablePositionIncrements)
                                    mpq.Add(multiTerms.ToArray(), position);
                            position += positionIncrement;
                            multiTerms.Add(new Term(field, BytesRef.DeepCopyOf(bytes)));
                        if (enablePositionIncrements)
                            mpq.Add(multiTerms.ToArray(), position);
                    PhraseQuery pq = NewPhraseQuery();
                    pq.Slop = phraseSlop;
                    int position = -1;

                    for (int i = 0; i < numTokens; i++)
                        int positionIncrement = 1;

                            bool hasNext = buffer.IncrementToken();
                            Debug.Assert(hasNext == true);
                            if (posIncrAtt != null)
                                positionIncrement = posIncrAtt.PositionIncrement;
                        catch (System.IO.IOException)
                            // safe to ignore, because we know the number of tokens

                        if (enablePositionIncrements)
                            position += positionIncrement;
                            pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)), position);
                            pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)));
Ejemplo n.º 12
        /// <summary>
        /// 获得搜索内
        /// </summary>
        /// <returns></returns>
        private List <ViewModelContent> ShowSearchContent()
            //string indexPath = @"C:\lucenedir";
            string        indexPath = Server.MapPath("~/lucenedir");
            List <string> list      = Common.WebCommon.PanGuSplitWord(Request["txtSearch"].Trim());//对用户输入的搜索条件进行拆分。
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //PhraseQuery query = new PhraseQuery();
            //foreach (string word in list)//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            //    query.Add(new Term("Content", word));//根据内容
            //query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            PhraseQuery query = new PhraseQuery();

            foreach (string word in list)             //先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
                query.Add(new Term("Content", word)); //根据内容
            query.SetSlop(100);                       //多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)

            PhraseQuery queryTitle = new PhraseQuery();

            foreach (string word in list)                //先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
                queryTitle.Add(new Term("Title", word)); //根据内容
            BooleanQuery booleanQuery = new BooleanQuery();

            booleanQuery.Add(query, BooleanClause.Occur.SHOULD);
            booleanQuery.Add(queryTitle, BooleanClause.Occur.SHOULD);

            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));
            // query.Add(new Term("body", kw));//body中含有kw的文章

            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(booleanQuery, null, collector);                             //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            List <ViewModelContent> viewModelList = new List <ViewModelContent>();

            for (int i = 0; i < docs.Length; i++)
                ViewModelContent viewModel = new ViewModelContent();
                int      docId             = docs[i].doc;      //得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);            //找到文档id对应的文档详细信息
                viewModel.Id = Convert.ToInt32(doc.Get("Id")); // 取出放进字段的值
                //viewModel.Title = doc.Get("Title");

                viewModel.Title   = Common.WebCommon.CreateHightLight(Request["txtSearch"], doc.Get("Title"));   //将搜索的关键字高亮显示。
                viewModel.Content = Common.WebCommon.CreateHightLight(Request["txtSearch"], doc.Get("Content")); //将搜索的关键字高亮显示。
            SearchDetail searchDetail = new SearchDetail();

            //searchDetail.Id = Guid.NewGuid();
            searchDetail.KeyWords       = Request["txtSearch"].Trim();
            searchDetail.SearchDateTime = DateTime.Now;

Ejemplo n.º 13
        public static Query AnalyzedTerm(string fieldName, string term, LuceneTermType type, Analyzer analyzer, float?boost = null, float?similarity = null)
            if (type != LuceneTermType.String && type != LuceneTermType.Prefix && type != LuceneTermType.WildCard)
                throw new InvalidOperationException("Analyzed terms can be only created from string values.");

            if (boost.HasValue == false)
                boost = 1;

            if (type == LuceneTermType.WildCard)
                return(new WildcardQuery(GetAnalyzedWildcardTerm(fieldName, term, analyzer))
                    Boost = boost.Value

            var tokenStream = analyzer.ReusableTokenStream(fieldName, new StringReader(term));
            var terms       = new List <string>();

            while (tokenStream.IncrementToken())
                var attribute = (TermAttribute)tokenStream.GetAttribute <ITermAttribute>();

            if (type == LuceneTermType.Prefix)
                if (terms.Count != 0)
                    var first      = terms[0];
                    var actualTerm = first[first.Length - 1] == AsteriskChar?first.Substring(0, first.Length - 1) : first;

                    return(new PrefixQuery(new Term(fieldName, actualTerm))
                        Boost = boost.Value
                // if the term that we are trying to prefix has been removed entirely by the analyzer, then we are going
                // to cheat a bit, and check for both the term in as specified and the term in lower case format so we can
                // find it regardless of casing
                var removeStar   = term.Substring(0, term.Length - 1);
                var booleanQuery = new BooleanQuery
                    Clauses =
                        new BooleanClause(new PrefixQuery(new Term(fieldName, removeStar)),                    Occur.SHOULD),
                        new BooleanClause(new PrefixQuery(new Term(fieldName, removeStar.ToLowerInvariant())), Occur.SHOULD)
                    Boost = boost.Value

            if (terms.Count == 1)
                return(new TermQuery(new Term(fieldName, terms[0]))
                    Boost = boost.Value

            var pq = new PhraseQuery
                Boost = boost.Value

            foreach (var t in terms)
                pq.Add(new Term(fieldName, t));

Ejemplo n.º 14
        /// <summary>
        /// Adds a standard type clause to this instance
        /// </summary>
        /// <param name="term">Term to add to this query.</param>
        /// <param name="occurrence">Defines how the term is added to this query.</param>
        /// <param name="slop">The amount of allowed slop in a phrase query.</param>
        /// <remarks>
        /// Slop is the amount of movement each word is allowed in a non-exact phrase query.
        /// For instance if you search for "Adobe Systems Incorporated" and the slop is set to 0 then
        /// only results with that term is allowed. If you set the slop to 2 then two movements can be
        /// made, max, for each word. In the same example with slop set to 2 results would be returned 
        /// for "Adobe Systems Incorporated", "Adobe Incorporated Systems", "Systems Adobe Incorporated",
        /// and "Systems Incorporated Adobe". 
        /// </remarks>
        public void AddBooleanClause(SearchTerm term, ClauseOccurrence occurrence, int slop)
            if (term == null)
                throw new ArgumentNullException("term", "term cannot be null");

            if (term.IsPhrase) {
                PhraseQuery phraseQuery = new PhraseQuery();
                this.luceneQuery.Add(phraseQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                phraseQuery = null;
            else {
                TermQuery termQuery = new TermQuery(term.GetLuceneTerm());
                this.luceneQuery.Add(termQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                termQuery = null;
Ejemplo n.º 15
        public void TestPhraseHighlightTest()
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            Document    doc    = new Document();
            FieldType   type   = new FieldType(TextField.TYPE_STORED);

            type.StoreTermVectorOffsets   = (true);
            type.StoreTermVectorPositions = (true);
            type.StoreTermVectors         = (true);
            Field longTermField   = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type);
            Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type);

            FastVectorHighlighter highlighter = new FastVectorHighlighter();
            IndexReader           reader      = DirectoryReader.Open(writer, true);
            int    docId = 0;
            String field = "no_long_term";

                BooleanQuery query = new BooleanQuery();
                query.Add(new TermQuery(new Term(field, "test")), Occur.MUST);
                query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST);
                query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                // highlighted results are centered
                assertEquals(1, bestFragments.Length);
                assertEquals("<b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
                BooleanQuery query = new BooleanQuery();
                PhraseQuery  pq    = new PhraseQuery();
                pq.Add(new Term(field, "test"));
                pq.Add(new Term(field, "foo"));
                pq.Add(new Term(field, "highlighed"));
                pq.Slop = (5);
                query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST);
                query.Add(pq, Occur.MUST);
                query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                // highlighted results are centered
                assertEquals(0, bestFragments.Length);
                bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                             docId, field, 30, 1);
                // highlighted results are centered
                assertEquals(1, bestFragments.Length);
                assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
                PhraseQuery query = new PhraseQuery();
                query.Add(new Term(field, "test"));
                query.Add(new Term(field, "foo"));
                query.Add(new Term(field, "highlighed"));
                query.Slop = (3);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                // highlighted results are centered
                assertEquals(0, bestFragments.Length);
                bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                             docId, field, 30, 1);
                // highlighted results are centered
                assertEquals(1, bestFragments.Length);
                assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
                PhraseQuery query = new PhraseQuery();
                query.Add(new Term(field, "test"));
                query.Add(new Term(field, "foo"));
                query.Add(new Term(field, "highlighted"));
                query.Slop = (30);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                assertEquals(0, bestFragments.Length);
                BooleanQuery query = new BooleanQuery();
                PhraseQuery  pq    = new PhraseQuery();
                pq.Add(new Term(field, "test"));
                pq.Add(new Term(field, "foo"));
                pq.Add(new Term(field, "highlighed"));
                pq.Slop = (5);
                BooleanQuery inner = new BooleanQuery();
                inner.Add(pq, Occur.MUST);
                inner.Add(new TermQuery(new Term(field, "foo")), Occur.MUST);
                query.Add(inner, Occur.MUST);
                query.Add(pq, Occur.MUST);
                query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                assertEquals(0, bestFragments.Length);

                bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                             docId, field, 30, 1);
                // highlighted results are centered
                assertEquals(1, bestFragments.Length);
                assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);

            field = "long_term";
                BooleanQuery query = new BooleanQuery();
                query.Add(new TermQuery(new Term(field,
                                                 "thisisaverylongwordandmakessurethisfails")), Occur.MUST);
                query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST);
                query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
                FieldQuery fieldQuery    = highlighter.GetFieldQuery(query, reader);
                String[]   bestFragments = highlighter.GetBestFragments(fieldQuery, reader,
                                                                        docId, field, 18, 1);
                // highlighted results are centered
                assertEquals(1, bestFragments.Length);
Ejemplo n.º 16
        public virtual void TestPositionIncrementMultiFields()
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            IndexReader reader = writer.Reader;

            IndexSearcher searcher = NewSearcher(reader);
            PhraseQuery query = new PhraseQuery();
            query.Add(new Term("indexed_not_tokenized", "test1"));
            query.Add(new Term("indexed_not_tokenized", "test2"));

            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            DoAssert(searcher.Doc(hits[0].Doc), true);
Ejemplo n.º 17
        public void TestBooleanPhraseWithSynonym()
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            Document    doc    = new Document();
            FieldType   type   = new FieldType(TextField.TYPE_NOT_STORED);

            type.StoreTermVectorOffsets   = (true);
            type.StoreTermVectorPositions = (true);
            type.StoreTermVectors         = (true);
            Token syn = new Token("httpwwwfacebookcom", 6, 29);

            syn.PositionIncrement = (0);
            CannedTokenStream ts = new CannedTokenStream(
                new Token("test", 0, 4),
                new Token("http", 6, 10),
                new Token("www", 13, 16),
                new Token("facebook", 17, 25),
                new Token("com", 26, 29)
            Field field = new Field("field", ts, type);

            doc.Add(new StoredField("field", "Test:"));
            FastVectorHighlighter highlighter = new FastVectorHighlighter();

            IndexReader reader = DirectoryReader.Open(writer, true);
            int         docId  = 0;

            // query1: match
            PhraseQuery pq = new PhraseQuery();

            pq.Add(new Term("field", "test"));
            pq.Add(new Term("field", "http"));
            pq.Add(new Term("field", "www"));
            pq.Add(new Term("field", "facebook"));
            pq.Add(new Term("field", "com"));
            FieldQuery fieldQuery = highlighter.GetFieldQuery(pq, reader);

            String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1);
            assertEquals("<b>Test:</b>", bestFragments[0]);

            // query2: match
            PhraseQuery pq2 = new PhraseQuery();

            pq2.Add(new Term("field", "test"));
            pq2.Add(new Term("field", "httpwwwfacebookcom"));
            pq2.Add(new Term("field", "www"));
            pq2.Add(new Term("field", "facebook"));
            pq2.Add(new Term("field", "com"));
            fieldQuery    = highlighter.GetFieldQuery(pq2, reader);
            bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1);
            assertEquals("<b>Test:</b>", bestFragments[0]);

            // query3: OR query1 and query2 together
            BooleanQuery bq = new BooleanQuery();

            bq.Add(pq, Occur.SHOULD);
            bq.Add(pq2, Occur.SHOULD);
            fieldQuery    = highlighter.GetFieldQuery(bq, reader);
            bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1);
            assertEquals("<b>Test:</b>", bestFragments[0]);

Ejemplo n.º 18
 public virtual void TestPhrase2()
     PhraseQuery query = new PhraseQuery();
     query.Add(new Term("field", "seventish"));
     query.Add(new Term("field", "sevenon"));
     CheckHits(query, new int[] { });
Ejemplo n.º 19
        private Query CreateStringValueQuery(QueryFieldValue value, FieldInfo currentField)
            switch (value.Token)
            case SnLucLexer.Token.Number:
            case SnLucLexer.Token.String:
                if (value.StringValue == ContentQuery.EmptyText)
                    return(new TermQuery(new Term(currentField.Name, value.StringValue)));
                if (value.StringValue == ContentQuery.EmptyInnerQueryText)
                    return(new TermQuery(new Term("Id", NumericUtils.IntToPrefixCoded(0))));

                var words = GetAnalyzedText(currentField.Name, value.StringValue);

                if (words.Length == 0)
                    words = new String[] { String.Empty }
                ;                                              //return null;
                if (words.Length == 1)
                    var term = new Term(currentField.Name, words[0]);
                    if (value.FuzzyValue == null)
                        return(new TermQuery(term));
                    return(new FuzzyQuery(term, Convert.ToSingle(value.FuzzyValue)));

                var phraseQuery = new PhraseQuery();
                foreach (var word in words)
                    phraseQuery.Add(new Term(currentField.Name, word));

                if (value.FuzzyValue != null)
                    var slop = Convert.ToInt32(value.FuzzyValue.Value);

            case SnLucLexer.Token.WildcardString:
                if (!value.StringValue.EndsWith("*"))
                    return(new WildcardQuery(new Term(currentField.Name, value.StringValue)));
                var s = value.StringValue.TrimEnd('*');
                if (s.Contains('?') || s.Contains('*'))
                    return(new WildcardQuery(new Term(currentField.Name, value.StringValue)));
                return(new PrefixQuery(new Term(currentField.Name, s)));

                throw new NotImplementedException("CreateValueQuery with Token: " + value.Token);
Ejemplo n.º 20
        public virtual void TestWithPendingDeletes3()
            // main directory
            Directory dir = NewDirectory();
            // auxiliary directory
            Directory aux = NewDirectory();

            SetUpDirs(dir, aux);
            IndexWriter writer = NewWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND));

            // Adds 10 docs, then replaces them with another 10
            // docs, so 10 pending deletes:
            for (int i = 0; i < 20; i++)
                Document doc = new Document();
                doc.Add(NewStringField("id", "" + (i % 10), Field.Store.NO));
                doc.Add(NewTextField("content", "bbb " + i, Field.Store.NO));
                writer.UpdateDocument(new Term("id", "" + (i % 10)), doc);

            // Deletes one of the 10 added docs, leaving 9:
            PhraseQuery q = new PhraseQuery();
            q.Add(new Term("content", "bbb"));
            q.Add(new Term("content", "14"));



            VerifyNumDocs(dir, 1039);
            VerifyTermDocs(dir, new Term("content", "aaa"), 1030);
            VerifyTermDocs(dir, new Term("content", "bbb"), 9);

Ejemplo n.º 21
        public List <SearchResult> LenuceOrSearch(string kw, int pageNo, int pageLen, out int recCount)
            FSDirectory        directory = FSDirectory.Open(new DirectoryInfo(IndexPath), new NoLockFactory());
            IndexReader        reader    = IndexReader.Open(directory, true);
            IndexSearcher      searcher  = new IndexSearcher(reader);
            List <PhraseQuery> lstQuery  = new List <PhraseQuery>();
            List <string>      lstkw     = PanGuSplitWord(kw);//对用户输入的搜索条件进行拆分。

            foreach (string word in lstkw)
                PhraseQuery query = new PhraseQuery();      //查询条件
                query.Slop = 100;                           //两个词的距离大于100(经验值)就不放入搜索结果,因为距离太远相关度就不高了
                query.Add(new Term("Content", word));       //contains("Content",word)

                PhraseQuery titleQuery = new PhraseQuery(); //查询条件
                titleQuery.Add(new Term("Title", word));


            BooleanQuery bq = new BooleanQuery();

            foreach (var v in lstQuery)
                //Occur.Should 表示 Or , Must 表示 and 运算
                bq.Add(v, Occur.SHOULD);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);                //盛放查询结果的容器

            searcher.Search(bq, null, collector);                                                    //使用query这个查询条件进行搜索,搜索结果放入collector

            recCount = collector.TotalHits;                                                          //总的结果条数
            ScoreDoc[] docs = collector.TopDocs((pageNo - 1) * pageLen, pageNo * pageLen).ScoreDocs; //从查询结果中取出第m条到第n条的数据

            List <SearchResult> resultList = new List <SearchResult>();
            string msg   = string.Empty;
            string title = string.Empty;

            for (int i = 0; i < docs.Length; i++)          //遍历查询结果
                int docId = docs[i].Doc;                   //拿到文档的id,因为Document可能非常占内存(思考DataSet和DataReader的区别)
                Document     doc    = searcher.Doc(docId); //根据id查询内容。放进去的是Document,查出来的还是Document
                SearchResult result = new SearchResult();
                result.Id = Convert.ToInt32(doc.Get("Id"));
                msg       = doc.Get("Content");//只有 Field.Store.YES的字段才能用Get查出来
                title     = doc.Get("Title");
                foreach (string word in lstkw)
                    title = title.Replace(word, "<span style='color:red;'>" + word + "</span>");
                result.Msg        = CreateHightLight(kw, msg);
                result.Title      = title;
                result.CreateTime = Convert.ToDateTime(doc.Get("CreateTime"));
                result.Url        = "/Article/Details?Id=" + result.Id + "&kw=" + kw;