internal void Flatten(Query sourceQuery, IndexReader reader, ICollection <Query> flatQueries) { if (sourceQuery is BooleanQuery bq) { foreach (BooleanClause clause in bq) { if (!clause.IsProhibited) { Flatten(ApplyParentBoost(clause.Query, bq), reader, flatQueries); } } } else if (sourceQuery is DisjunctionMaxQuery dmq) { foreach (Query query in dmq) { Flatten(ApplyParentBoost(query, dmq), reader, flatQueries); } } else if (sourceQuery is TermQuery) { if (!flatQueries.Contains(sourceQuery)) { flatQueries.Add(sourceQuery); } } else if (sourceQuery is PhraseQuery pq) { if (!flatQueries.Contains(sourceQuery)) // LUCENENET - set semantics, but this is a list. The original logic was already correct. { if (pq.GetTerms().Length > 1) { flatQueries.Add(pq); } else if (pq.GetTerms().Length == 1) { Query flat = new TermQuery(pq.GetTerms()[0]) { Boost = pq.Boost }; flatQueries.Add(flat); } } } else if (sourceQuery is ConstantScoreQuery constantScoreQuery) { Query q = constantScoreQuery.Query; if (q != null) { Flatten(ApplyParentBoost(q, sourceQuery), reader, flatQueries); } } else if (sourceQuery is FilteredQuery filteredQuery) { Query q = filteredQuery.Query; if (q != null) { Flatten(ApplyParentBoost(q, sourceQuery), reader, flatQueries); } } else if (reader != null) { Query query = sourceQuery; if (sourceQuery is MultiTermQuery) { MultiTermQuery copy = (MultiTermQuery)sourceQuery.Clone(); copy.MultiTermRewriteMethod = new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS); query = copy; } Query rewritten = query.Rewrite(reader); if (rewritten != query) { // only rewrite once and then flatten again - the rewritten query could have a speacial treatment // if this method is overwritten in a subclass. Flatten(rewritten, reader, flatQueries); } // if the query is already rewritten we discard it } // else discard queries }
/// <summary> /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <paramref name="query"/>. /// </summary> /// <param name="query"><see cref="Query"/> to extract Terms from</param> /// <param name="terms">Map to place created <see cref="WeightedSpanTerm"/>s in</param> /// <exception cref="IOException">If there is a low-level I/O error</exception> protected virtual void Extract(Query query, IDictionary <string, WeightedSpanTerm> terms) { if (query is BooleanQuery booleanQuery) { IList <BooleanClause> queryClauses = booleanQuery.Clauses; for (int i = 0; i < queryClauses.Count; i++) { if (!queryClauses[i].IsProhibited) { Extract(queryClauses[i].Query, terms); } } } else if (query is PhraseQuery phraseQuery) { Term[] phraseQueryTerms = phraseQuery.GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.Slop; int[] positions = phraseQuery.GetPositions(); // add largest position increment to slop if (positions.Length > 0) { int lastPos = positions[0]; int largestInc = 0; int sz = positions.Length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos; if (inc > largestInc) { largestInc = inc; } lastPos = pos; } if (largestInc > 1) { slop += largestInc; } } bool inorder = slop == 0; SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } else if (query is TermQuery) { ExtractWeightedTerms(terms, query); } else if (query is SpanQuery spanQuery) { ExtractWeightedSpanTerms(terms, spanQuery); } else if (query is FilteredQuery filteredQuery) { Extract(filteredQuery.Query, terms); } else if (query is ConstantScoreQuery constantScoreQuery) { Query q = constantScoreQuery.Query; if (q != null) { Extract(q, terms); } } else if (query is CommonTermsQuery) { // specialized since rewriting would change the result query // this query is TermContext sensitive. ExtractWeightedTerms(terms, query); } else if (query is DisjunctionMaxQuery disjunctionMaxQuery) { foreach (var q in disjunctionMaxQuery) { Extract(q, terms); } } else if (query is MultiPhraseQuery mpq) { IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } var disjunctLists = new List <SpanQuery> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; List <SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length)); ++distinctPositions; } foreach (var term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; foreach (var disjuncts in disjunctLists) { if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts.ToArray()); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } } else { Query origQuery = query; if (query is MultiTermQuery) { if (!expandMultiTermQuery) { return; } MultiTermQuery copy = (MultiTermQuery)query.Clone(); copy.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; origQuery = copy; } IndexReader reader = GetLeafContext().Reader; Query rewritten = origQuery.Rewrite(reader); if (rewritten != origQuery) { // only rewrite once and then flatten again - the rewritten query could have a speacial treatment // if this method is overwritten in a subclass or above in the next recursion Extract(rewritten, terms); } } ExtractUnknownQuery(query, terms); }
public override Query Rewrite(IndexReader reader, MultiTermQuery query) { return(@delegate.Rewrite(reader, query)); }
public override abstract Query Rewrite(IndexReader reader, MultiTermQuery query);
public override Query Rewrite(IndexReader reader, MultiTermQuery query) { //we'll try to use the SCORING_BOOLEAN_QUERY_REWRITE but this can result in TooManyClauses //which we need to handle. This might not be the greatest solution but its a work around for now. //see https://github.com/Shazwazza/Examine/pull/89 //In newer lucene versions there's a top docs rewrite which doesn't have this problem but this looks like //an enormous amount of work to backport. //potentially we could some how bubble up the original query that has generated too many term matches so that //the consumer could modify their search accordingly. //another option would be to use the commented out code below and catch `booleanQuery.Add` and exit the loop when the //max terms are surpassed - but that might mean odd results. var baseClass = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; try { var result = baseClass.Rewrite(reader, query); return(result); } catch (BooleanQuery.TooManyClauses) { //TODO: We could try to bubble this up to the consumer somehow? event or otherwise? //TODO: We could add a cache for known terms that will cause this so that we don't spend too much CPU rewriting and recatching the exception each time //we cannot perform this rewrite so we need to use the default for this query var defaultRewriter = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; var result = defaultRewriter.Rewrite(reader, query); return(result); } //NOTE: this is the code that normally runs //var filteredTermEnum = query.GetEnum(reader); //var booleanQuery = new BooleanQuery(true); //var inc = 0; //try //{ // do // { // var t = filteredTermEnum.Term(); // if (t != null) // { // var termQuery = new TermQuery(t); // termQuery.SetBoost(query.GetBoost() * filteredTermEnum.Difference()); // // NOTE: this is where the TooManyClauses Exception would occur // // booleanQuery.Add(termQuery, BooleanClause.Occur.SHOULD); // ++inc; // } // } // while (filteredTermEnum.Next()); //} //finally //{ // filteredTermEnum.Close(); //} // // NOTE: this is internal/protected, so if we wanted to use this code we'd have to subclass BooleanQuery // //query.IncTotalNumberOfTerms(inc); //return booleanQuery; }
/// <summary> /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>. /// </summary> /// <param name="query">Query to extract Terms from</param> /// <param name="terms">Map to place created WeightedSpanTerms in</param> private void Extract(Query query, IDictionary <String, WeightedSpanTerm> terms) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited) { Extract(queryClauses[i].Query, terms); } } } else if (query is PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery)query); Term[] phraseQueryTerms = phraseQuery.GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.Slop; int[] positions = phraseQuery.GetPositions(); // add largest position increment to slop if (positions.Length > 0) { int lastPos = positions[0]; int largestInc = 0; int sz = positions.Length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos; if (inc > largestInc) { largestInc = inc; } lastPos = pos; } if (largestInc > 1) { slop += largestInc; } } bool inorder = slop == 0; SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } else if (query is TermQuery) { ExtractWeightedTerms(terms, query); } else if (query is SpanQuery) { ExtractWeightedSpanTerms(terms, (SpanQuery)query); } else if (query is FilteredQuery) { Extract(((FilteredQuery)query).Query, terms); } else if (query is DisjunctionMaxQuery) { foreach (var q in ((DisjunctionMaxQuery)query)) { Extract(q, terms); } } else if (query is MultiTermQuery && expandMultiTermQuery) { MultiTermQuery mtq = ((MultiTermQuery)query); if (mtq.RewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { mtq = (MultiTermQuery)mtq.Clone(); mtq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; query = mtq; } FakeReader fReader = new FakeReader(); MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq); if (fReader.Field != null) { IndexReader ir = GetReaderForField(fReader.Field); Extract(query.Rewrite(ir), terms); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } var disjunctLists = new List <SpanQuery> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; List <SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length)); ++distinctPositions; } for (int j = 0; j < termArray.Length; ++j) { disjuncts.Add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { List <SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts.ToArray()); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } } }