Esempio n. 1
0
 internal void Flatten(Query sourceQuery, IndexReader reader, ICollection <Query> flatQueries)
 {
     if (sourceQuery is BooleanQuery bq)
     {
         foreach (BooleanClause clause in bq)
         {
             if (!clause.IsProhibited)
             {
                 Flatten(ApplyParentBoost(clause.Query, bq), reader, flatQueries);
             }
         }
     }
     else if (sourceQuery is DisjunctionMaxQuery dmq)
     {
         foreach (Query query in dmq)
         {
             Flatten(ApplyParentBoost(query, dmq), reader, flatQueries);
         }
     }
     else if (sourceQuery is TermQuery)
     {
         if (!flatQueries.Contains(sourceQuery))
         {
             flatQueries.Add(sourceQuery);
         }
     }
     else if (sourceQuery is PhraseQuery pq)
     {
         if (!flatQueries.Contains(sourceQuery)) // LUCENENET - set semantics, but this is a list. The original logic was already correct.
         {
             if (pq.GetTerms().Length > 1)
             {
                 flatQueries.Add(pq);
             }
             else if (pq.GetTerms().Length == 1)
             {
                 Query flat = new TermQuery(pq.GetTerms()[0])
                 {
                     Boost = pq.Boost
                 };
                 flatQueries.Add(flat);
             }
         }
     }
     else if (sourceQuery is ConstantScoreQuery constantScoreQuery)
     {
         Query q = constantScoreQuery.Query;
         if (q != null)
         {
             Flatten(ApplyParentBoost(q, sourceQuery), reader, flatQueries);
         }
     }
     else if (sourceQuery is FilteredQuery filteredQuery)
     {
         Query q = filteredQuery.Query;
         if (q != null)
         {
             Flatten(ApplyParentBoost(q, sourceQuery), reader, flatQueries);
         }
     }
     else if (reader != null)
     {
         Query query = sourceQuery;
         if (sourceQuery is MultiTermQuery)
         {
             MultiTermQuery copy = (MultiTermQuery)sourceQuery.Clone();
             copy.MultiTermRewriteMethod = new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS);
             query = copy;
         }
         Query rewritten = query.Rewrite(reader);
         if (rewritten != query)
         {
             // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
             // if this method is overwritten in a subclass.
             Flatten(rewritten, reader, flatQueries);
         }
         // if the query is already rewritten we discard it
     }
     // else discard queries
 }
Esempio n. 2
0
        /// <summary>
        /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <paramref name="query"/>.
        /// </summary>
        /// <param name="query"><see cref="Query"/> to extract Terms from</param>
        /// <param name="terms">Map to place created <see cref="WeightedSpanTerm"/>s in</param>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        protected virtual void Extract(Query query, IDictionary <string, WeightedSpanTerm> terms)
        {
            if (query is BooleanQuery booleanQuery)
            {
                IList <BooleanClause> queryClauses = booleanQuery.Clauses;

                for (int i = 0; i < queryClauses.Count; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        Extract(queryClauses[i].Query, terms);
                    }
                }
            }
            else if (query is PhraseQuery phraseQuery)
            {
                Term[]      phraseQueryTerms = phraseQuery.GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }
                int   slop      = phraseQuery.Slop;
                int[] positions = phraseQuery.GetPositions();
                // add largest position increment to slop
                if (positions.Length > 0)
                {
                    int lastPos    = positions[0];
                    int largestInc = 0;
                    int sz         = positions.Length;
                    for (int i = 1; i < sz; i++)
                    {
                        int pos = positions[i];
                        int inc = pos - lastPos;
                        if (inc > largestInc)
                        {
                            largestInc = inc;
                        }
                        lastPos = pos;
                    }
                    if (largestInc > 1)
                    {
                        slop += largestInc;
                    }
                }

                bool inorder = slop == 0;

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                ExtractWeightedSpanTerms(terms, sp);
            }
            else if (query is TermQuery)
            {
                ExtractWeightedTerms(terms, query);
            }
            else if (query is SpanQuery spanQuery)
            {
                ExtractWeightedSpanTerms(terms, spanQuery);
            }
            else if (query is FilteredQuery filteredQuery)
            {
                Extract(filteredQuery.Query, terms);
            }
            else if (query is ConstantScoreQuery constantScoreQuery)
            {
                Query q = constantScoreQuery.Query;
                if (q != null)
                {
                    Extract(q, terms);
                }
            }
            else if (query is CommonTermsQuery)
            {
                // specialized since rewriting would change the result query
                // this query is TermContext sensitive.
                ExtractWeightedTerms(terms, query);
            }
            else if (query is DisjunctionMaxQuery disjunctionMaxQuery)
            {
                foreach (var q in disjunctionMaxQuery)
                {
                    Extract(q, terms);
                }
            }
            else if (query is MultiPhraseQuery mpq)
            {
                IList <Term[]> termArrays = mpq.GetTermArrays();
                int[]          positions  = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    var disjunctLists     = new List <SpanQuery> [maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]           termArray = termArrays[i];
                        List <SpanQuery> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length));
                            ++distinctPositions;
                        }
                        foreach (var term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    foreach (var disjuncts in disjunctLists)
                    {
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    ExtractWeightedSpanTerms(terms, sp);
                }
            }
            else
            {
                Query origQuery = query;
                if (query is MultiTermQuery)
                {
                    if (!expandMultiTermQuery)
                    {
                        return;
                    }
                    MultiTermQuery copy = (MultiTermQuery)query.Clone();
                    copy.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                    origQuery = copy;
                }
                IndexReader reader    = GetLeafContext().Reader;
                Query       rewritten = origQuery.Rewrite(reader);
                if (rewritten != origQuery)
                {
                    // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
                    // if this method is overwritten in a subclass or above in the next recursion
                    Extract(rewritten, terms);
                }
            }
            ExtractUnknownQuery(query, terms);
        }
Esempio n. 3
0
 public override Query Rewrite(IndexReader reader, MultiTermQuery query)
 {
     return(@delegate.Rewrite(reader, query));
 }
Esempio n. 4
0
 public override abstract Query Rewrite(IndexReader reader, MultiTermQuery query);
        public override Query Rewrite(IndexReader reader, MultiTermQuery query)
        {
            //we'll try to use the SCORING_BOOLEAN_QUERY_REWRITE but this can result in TooManyClauses
            //which we need to handle. This might not be the greatest solution but its a work around for now.
            //see https://github.com/Shazwazza/Examine/pull/89
            //In newer lucene versions there's a top docs rewrite which doesn't have this problem but this looks like
            //an enormous amount of work to backport.
            //potentially we could some how bubble up the original query that has generated too many term matches so that
            //the consumer could modify their search accordingly.
            //another option would be to use the commented out code below and catch `booleanQuery.Add` and exit the loop when the
            //max terms are surpassed - but that might mean odd results.

            var baseClass = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;

            try
            {
                var result = baseClass.Rewrite(reader, query);
                return(result);
            }
            catch (BooleanQuery.TooManyClauses)
            {
                //TODO: We could try to bubble this up to the consumer somehow? event or otherwise?
                //TODO: We could add a cache for known terms that will cause this so that we don't spend too much CPU rewriting and recatching the exception each time

                //we cannot perform this rewrite so we need to use the default for this query
                var defaultRewriter = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
                var result          = defaultRewriter.Rewrite(reader, query);
                return(result);
            }


            //NOTE: this is the code that normally runs

            //var filteredTermEnum = query.GetEnum(reader);
            //var booleanQuery = new BooleanQuery(true);
            //var inc = 0;
            //try
            //{
            //    do
            //    {
            //        var t = filteredTermEnum.Term();
            //        if (t != null)
            //        {
            //            var termQuery = new TermQuery(t);
            //            termQuery.SetBoost(query.GetBoost() * filteredTermEnum.Difference());
            //
            //            NOTE: this is where the TooManyClauses Exception would occur
            //
            //            booleanQuery.Add(termQuery, BooleanClause.Occur.SHOULD);
            //            ++inc;
            //        }
            //    }
            //    while (filteredTermEnum.Next());
            //}
            //finally
            //{
            //    filteredTermEnum.Close();
            //}
            //
            // NOTE: this is internal/protected, so if we wanted to use this code we'd have to subclass BooleanQuery
            //
            //query.IncTotalNumberOfTerms(inc);
            //return booleanQuery;
        }
        /// <summary>
        /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>.
        /// </summary>
        /// <param name="query">Query to extract Terms from</param>
        /// <param name="terms">Map to place created WeightedSpanTerms in</param>
        private void Extract(Query query, IDictionary <String, WeightedSpanTerm> terms)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        Extract(queryClauses[i].Query, terms);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                PhraseQuery phraseQuery      = ((PhraseQuery)query);
                Term[]      phraseQueryTerms = phraseQuery.GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }
                int   slop      = phraseQuery.Slop;
                int[] positions = phraseQuery.GetPositions();
                // add largest position increment to slop
                if (positions.Length > 0)
                {
                    int lastPos    = positions[0];
                    int largestInc = 0;
                    int sz         = positions.Length;
                    for (int i = 1; i < sz; i++)
                    {
                        int pos = positions[i];
                        int inc = pos - lastPos;
                        if (inc > largestInc)
                        {
                            largestInc = inc;
                        }
                        lastPos = pos;
                    }
                    if (largestInc > 1)
                    {
                        slop += largestInc;
                    }
                }

                bool inorder = slop == 0;

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                ExtractWeightedSpanTerms(terms, sp);
            }
            else if (query is TermQuery)
            {
                ExtractWeightedTerms(terms, query);
            }
            else if (query is SpanQuery)
            {
                ExtractWeightedSpanTerms(terms, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                Extract(((FilteredQuery)query).Query, terms);
            }
            else if (query is DisjunctionMaxQuery)
            {
                foreach (var q in ((DisjunctionMaxQuery)query))
                {
                    Extract(q, terms);
                }
            }
            else if (query is MultiTermQuery && expandMultiTermQuery)
            {
                MultiTermQuery mtq = ((MultiTermQuery)query);
                if (mtq.RewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
                {
                    mtq = (MultiTermQuery)mtq.Clone();
                    mtq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                    query             = mtq;
                }
                FakeReader fReader = new FakeReader();
                MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq);
                if (fReader.Field != null)
                {
                    IndexReader ir = GetReaderForField(fReader.Field);
                    Extract(query.Rewrite(ir), terms);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq        = (MultiPhraseQuery)query;
                IList <Term[]>   termArrays = mpq.GetTermArrays();
                int[]            positions  = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    var disjunctLists     = new List <SpanQuery> [maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]           termArray = termArrays[i];
                        List <SpanQuery> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length));
                            ++distinctPositions;
                        }
                        for (int j = 0; j < termArray.Length; ++j)
                        {
                            disjuncts.Add(new SpanTermQuery(termArray[j]));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        List <SpanQuery> disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    ExtractWeightedSpanTerms(terms, sp);
                }
            }
        }