public virtual void TestCrazySpans() { // The problem: "normal" lucene queries create scorers, returning null if terms dont exist // this means they never score a term that does not exist. // however with spans, there is only one scorer for the whole hierarchy: // inner queries are not real queries, their boosts are ignored, etc. Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, Similarity, TimeZone); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); doc.Add(NewField("foo", "bar", ft)); iw.AddDocument(doc); IndexReader ir = iw.GetReader(); iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar")); SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz")); Query query = new SpanOrQuery(s1, s2); TopDocs td = @is.Search(query, 10); Assert.AreEqual(1, td.TotalHits); float score = td.ScoreDocs[0].Score; Assert.IsTrue(score >= 0.0f); Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim); } ir.Dispose(); dir.Dispose(); }
public override Query Rewrite(IndexReader reader) { RegexQuery orig = new RegexQuery(_term); orig.RegexImplementation = _regexImpl; // RegexQuery (via MultiTermQuery).Rewrite always returns a BooleanQuery orig.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; //@@ BooleanQuery bq = (BooleanQuery)orig.Rewrite(reader); BooleanClause[] clauses = bq.GetClauses(); SpanQuery[] sqs = new SpanQuery[clauses.Length]; for (int i = 0; i < clauses.Length; i++) { BooleanClause clause = clauses[i]; // Clauses from RegexQuery.Rewrite are always TermQuery's TermQuery tq = (TermQuery)clause.Query; sqs[i] = new SpanTermQuery(tq.Term); sqs[i].Boost = tq.Boost; } //efor SpanOrQuery query = new SpanOrQuery(sqs); query.Boost = orig.Boost; return(query); }
public override SpanQuery GetSpanQuery(XmlElement e) { string fieldName = DOMUtils.GetAttributeWithInheritanceOrFail(e, "fieldName"); string value = DOMUtils.GetNonBlankTextOrFail(e); JCG.List <SpanQuery> clausesList = new JCG.List <SpanQuery>(); TokenStream ts = null; try { ts = analyzer.GetTokenStream(fieldName, value); ITermToBytesRefAttribute termAtt = ts.AddAttribute <ITermToBytesRefAttribute>(); BytesRef bytes = termAtt.BytesRef; ts.Reset(); while (ts.IncrementToken()) { termAtt.FillBytesRef(); SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.DeepCopyOf(bytes))); clausesList.Add(stq); } ts.End(); SpanOrQuery soq = new SpanOrQuery(clausesList.ToArray(/*new SpanQuery[clausesList.size()]*/)); soq.Boost = DOMUtils.GetAttribute(e, "boost", 1.0f); return(soq); } catch (Exception ioe) when(ioe.IsIOException()) { throw new ParserException("IOException parsing value:" + value, ioe); } finally { IOUtils.DisposeWhileHandlingException(ts); } }
private void AddComplexPhraseClause(IList <SpanQuery> spanClauses, BooleanQuery qc) { List <SpanQuery> ors = new List <SpanQuery>(); List <SpanQuery> nots = new List <SpanQuery>(); BooleanClause[] bclauses = qc.GetClauses(); // For all clauses e.g. one* two~ for (int i = 0; i < bclauses.Length; i++) { Query childQuery = bclauses[i].Query; // select the list to which we will add these options List <SpanQuery> chosenList = ors; if (bclauses[i].Occur == Occur.MUST_NOT) { chosenList = nots; } if (childQuery is TermQuery) { TermQuery tq = (TermQuery)childQuery; SpanTermQuery stq = new SpanTermQuery(tq.Term); stq.Boost = tq.Boost; chosenList.Add(stq); } else if (childQuery is BooleanQuery) { BooleanQuery cbq = (BooleanQuery)childQuery; AddComplexPhraseClause(chosenList, cbq); } else { // LUCENETODO alternatively could call extract terms here? throw new ArgumentException("Unknown query type:" + childQuery.GetType().Name); } } if (ors.Count == 0) { return; } SpanOrQuery soq = new SpanOrQuery(ors .ToArray()); if (nots.Count == 0) { spanClauses.Add(soq); } else { SpanOrQuery snqs = new SpanOrQuery(nots .ToArray()); SpanNotQuery snq = new SpanNotQuery(soq, snqs); spanClauses.Add(snq); } }
public void PASS_CreateQuery() { SpanOrQuery query = new SpanOrQuery(new List<SpanQueryBase>(){ new SpanTermQuery("field", "value") }); Assert.IsNotNull(query); Assert.AreEqual(1, query.Clauses.Count()); }
public void PASS_Serialize() { SpanOrQuery query = new SpanOrQuery(new List<SpanQueryBase>(){ new SpanTermQuery("field", "value") }); string json = JsonConvert.SerializeObject(query); Assert.IsNotNull(json); string expectedJson = "{\"span_or\":{\"clauses\":[{\"span_term\":{\"field\":\"value\"}}]}}"; Assert.AreEqual(expectedJson, json); }
public void FAIL_CreateQuery() { try { SpanOrQuery query = new SpanOrQuery(new List<SpanQueryBase>() { null }); Assert.Fail(); } catch (ArgumentNullException ex) { Assert.AreEqual("clauses", ex.ParamName); } }
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { Dictionary<string, object> fieldDict = serializer.Deserialize<Dictionary<string, object>>(reader); if (fieldDict.ContainsKey(SpanQueryTypeEnum.Or.ToString())) fieldDict = JsonConvert.DeserializeObject<Dictionary<string, object>>(fieldDict.First().Value.ToString()); SpanOrQuery query = new SpanOrQuery(JsonConvert.DeserializeObject<IEnumerable<SpanQueryBase>>(fieldDict.GetString(_CLAUSES))); query.QueryName = fieldDict.GetStringOrDefault(QuerySerializer._QUERY_NAME); return query; }
public void TestPassesIfWrapped() { RegexpQuery regex = new RegexpQuery(new Term("field", "worl.")); SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper <RegexpQuery>(regex)); MemoryIndex mindex = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024); mindex.AddField("field", new MockAnalyzer(Random).GetTokenStream("field", "hello there")); // This passes though assertEquals(0, mindex.Search(wrappedquery), 0.00001f); }
public override Query Rewrite(IndexReader reader) { List <SpanQuery> list = new List <SpanQuery>(); foreach (Term term in (IEnumerable <Term>) this.GetTerms()) { WildcardQuery wildcardQuery1 = new WildcardQuery(term); wildcardQuery1.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; WildcardQuery wildcardQuery2 = wildcardQuery1; Query query1; try { query1 = wildcardQuery2.Rewrite(reader); } catch (BooleanQuery.TooManyClauses ex) { throw new TooManyClausesException(); } BooleanQuery booleanQuery = query1 as BooleanQuery; if (booleanQuery == null) { try { query1 = query1.Rewrite(reader); } catch (BooleanQuery.TooManyClauses ex) { throw new TooManyClausesException(); } booleanQuery = query1 as BooleanQuery; } if (booleanQuery == null) { throw new InvalidOperationException("Unexpected rewritten query type:" + (object)query1.GetType()); } BooleanClause[] clauses = booleanQuery.GetClauses(); Func <TermQuery, SpanTermQuery> createSpanTermQuery = (Func <TermQuery, SpanTermQuery>)(query => { return(new SpanTermQuery(query.Term) { Boost = query.Boost }); }); list.AddRange((IEnumerable <SpanQuery>)Enumerable.Select <BooleanClause, SpanTermQuery>((IEnumerable <BooleanClause>)clauses, (Func <BooleanClause, SpanTermQuery>)(t => createSpanTermQuery((TermQuery)t.Query)))); } SpanOrQuery spanOrQuery = new SpanOrQuery(list.ToArray()); spanOrQuery.Boost = this.Boost; return((Query)spanOrQuery); }
public void TestSpanOr() { Directory dir = NewDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.Add(body); body.SetStringValue("This is a test."); iw.AddDocument(doc); body.SetStringValue("Test a one sentence document."); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher searcher = NewSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer); //PostingsHighlighter highlighter = new PostingsHighlighter() { // @Override // protected Analyzer getIndexAnalyzer(String field) //{ // return analyzer; //} // }; SpanQuery childQuery = new SpanMultiTermQueryWrapper <WildcardQuery>(new WildcardQuery(new Term("body", "te*"))); Query query = new SpanOrQuery(new SpanQuery[] { childQuery }); TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); String[] snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); ir.Dispose(); dir.Dispose(); }
public override SpanQuery GetSpanQuery(XmlElement e) { JCG.List <SpanQuery> clausesList = new JCG.List <SpanQuery>(); for (XmlNode kid = e.FirstChild; kid != null; kid = kid.NextSibling) { if (kid.NodeType == XmlNodeType.Element) { SpanQuery clause = factory.GetSpanQuery((XmlElement)kid); clausesList.Add(clause); } } SpanQuery[] clauses = clausesList.ToArray(/*new SpanQuery[clausesList.size()]*/); SpanOrQuery soq = new SpanOrQuery(clauses); soq.Boost = DOMUtils.GetAttribute(e, "boost", 1.0f); return(soq); }
public override Query Rewrite(IndexReader reader) { FuzzyQuery fuzzyQuery = new FuzzyQuery(this.term, this.minimumSimilarity, this.PrefixLength); Query query1 = fuzzyQuery.Rewrite(reader); BooleanQuery booleanQuery = query1 as BooleanQuery; if (booleanQuery == null) { query1 = query1.Rewrite(reader); booleanQuery = query1 as BooleanQuery; } Func <TermQuery, SpanTermQuery> func = (Func <TermQuery, SpanTermQuery>)(query => { return(new SpanTermQuery(query.Term) { Boost = query.Boost }); }); if (booleanQuery != null) { BooleanClause[] clauses = booleanQuery.GetClauses(); if (clauses.Length == 1) { return((Query)func((TermQuery)clauses[0].Query)); } SpanQuery[] spanQueryArray = new SpanQuery[clauses.Length]; for (int index = 0; index < clauses.Length; ++index) { spanQueryArray[index] = (SpanQuery)func((TermQuery)clauses[index].Query); } SpanOrQuery spanOrQuery = new SpanOrQuery(spanQueryArray); spanOrQuery.Boost = fuzzyQuery.Boost; return((Query)spanOrQuery); } if (query1 is TermQuery) { return((Query)func((TermQuery)query1)); } throw new InvalidOperationException("Unexpected rewritten query type:" + (object)query1.GetType()); }
public virtual Query VisitSpanOrQuery(SpanOrQuery spanOrq) { throw new NotImplementedException(); }
/// <summary> /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <paramref name="query"/>. /// </summary> /// <param name="query"><see cref="Query"/> to extract Terms from</param> /// <param name="terms">Map to place created <see cref="WeightedSpanTerm"/>s in</param> /// <exception cref="IOException">If there is a low-level I/O error</exception> protected virtual void Extract(Query query, IDictionary <string, WeightedSpanTerm> terms) { if (query is BooleanQuery booleanQuery) { IList <BooleanClause> queryClauses = booleanQuery.Clauses; for (int i = 0; i < queryClauses.Count; i++) { if (!queryClauses[i].IsProhibited) { Extract(queryClauses[i].Query, terms); } } } else if (query is PhraseQuery phraseQuery) { Term[] phraseQueryTerms = phraseQuery.GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.Slop; int[] positions = phraseQuery.GetPositions(); // add largest position increment to slop if (positions.Length > 0) { int lastPos = positions[0]; int largestInc = 0; int sz = positions.Length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos; if (inc > largestInc) { largestInc = inc; } lastPos = pos; } if (largestInc > 1) { slop += largestInc; } } bool inorder = slop == 0; SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } else if (query is TermQuery) { ExtractWeightedTerms(terms, query); } else if (query is SpanQuery spanQuery) { ExtractWeightedSpanTerms(terms, spanQuery); } else if (query is FilteredQuery filteredQuery) { Extract(filteredQuery.Query, terms); } else if (query is ConstantScoreQuery constantScoreQuery) { Query q = constantScoreQuery.Query; if (q != null) { Extract(q, terms); } } else if (query is CommonTermsQuery) { // specialized since rewriting would change the result query // this query is TermContext sensitive. ExtractWeightedTerms(terms, query); } else if (query is DisjunctionMaxQuery disjunctionMaxQuery) { foreach (var q in disjunctionMaxQuery) { Extract(q, terms); } } else if (query is MultiPhraseQuery mpq) { IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } var disjunctLists = new List <SpanQuery> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; List <SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length)); ++distinctPositions; } foreach (var term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; foreach (var disjuncts in disjunctLists) { if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts.ToArray()); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } } else { Query origQuery = query; if (query is MultiTermQuery) { if (!expandMultiTermQuery) { return; } MultiTermQuery copy = (MultiTermQuery)query.Clone(); copy.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; origQuery = copy; } IndexReader reader = GetLeafContext().Reader; Query rewritten = origQuery.Rewrite(reader); if (rewritten != origQuery) { // only rewrite once and then flatten again - the rewritten query could have a speacial treatment // if this method is overwritten in a subclass or above in the next recursion Extract(rewritten, terms); } } ExtractUnknownQuery(query, terms); }
private void QueryToSpanQuery(Query query, ICollection<sbyte[]> payloads) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery)query).Clauses; for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].Prohibited) { QueryToSpanQuery(queryClauses[i].Query, payloads); } } } else if (query is PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery)query).Terms; SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery)query).Slop; bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } else if (query is TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term); stq.Boost = query.Boost; GetPayloads(payloads, stq); } else if (query is SpanQuery) { GetPayloads(payloads, (SpanQuery)query); } else if (query is FilteredQuery) { QueryToSpanQuery(((FilteredQuery)query).Query, payloads); } else if (query is DisjunctionMaxQuery) { IEnumerator<Query> enumerator = ((DisjunctionMaxQuery)query).GetEnumerator(); while (enumerator.MoveNext()) { QueryToSpanQuery(enumerator.Current, payloads); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; IList<Term[]> termArrays = mpq.TermArrays; int[] positions = mpq.Positions; if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } IList<Query>[] disjunctLists = new List<Query>[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; IList<Query> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List<Query>(termArray.Length)); ++distinctPositions; } foreach (Term term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { IList<Query> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts.OfType<SpanQuery>().ToArray()); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } } }
public virtual void TestCrazySpans() { // The problem: "normal" lucene queries create scorers, returning null if terms dont exist // this means they never score a term that does not exist. // however with spans, there is only one scorer for the whole hierarchy: // inner queries are not real queries, their boosts are ignored, etc. Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); doc.Add(NewField("foo", "bar", ft)); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar")); SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz")); Query query = new SpanOrQuery(s1, s2); TopDocs td = @is.Search(query, 10); Assert.AreEqual(1, td.TotalHits); float score = td.ScoreDocs[0].Score; Assert.IsTrue(score >= 0.0f); Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim); } ir.Dispose(); dir.Dispose(); }
private void QueryToSpanQuery(Query query, ICollection <byte[]> payloads) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited) { QueryToSpanQuery(queryClauses[i].Query, payloads); } } } else if (query is PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery)query).GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery)query).Slop; bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } else if (query is TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term); stq.Boost = query.Boost; GetPayloads(payloads, stq); } else if (query is SpanQuery) { GetPayloads(payloads, (SpanQuery)query); } else if (query is FilteredQuery) { QueryToSpanQuery(((FilteredQuery)query).Query, payloads); } else if (query is DisjunctionMaxQuery) { for (IEnumerator <Query> iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext();) { QueryToSpanQuery(iterator.Current, payloads); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; System.Collections.Generic.IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } IList <Query>[] disjunctLists = new IList <Query> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; IList <Query> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List <Query>(termArray.Length)); ++distinctPositions; } foreach (Term term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { IList <Query> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery((SpanQuery[])(disjuncts.ToArray())); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } } }
/// <summary> /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>. /// </summary> /// <param name="query">Query to extract Terms from</param> /// <param name="terms">Map to place created WeightedSpanTerms in</param> private void Extract(Query query, IDictionary <String, WeightedSpanTerm> terms) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited) { Extract(queryClauses[i].Query, terms); } } } else if (query is PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery)query); Term[] phraseQueryTerms = phraseQuery.GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.Slop; int[] positions = phraseQuery.GetPositions(); // add largest position increment to slop if (positions.Length > 0) { int lastPos = positions[0]; int largestInc = 0; int sz = positions.Length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos; if (inc > largestInc) { largestInc = inc; } lastPos = pos; } if (largestInc > 1) { slop += largestInc; } } bool inorder = slop == 0; SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } else if (query is TermQuery) { ExtractWeightedTerms(terms, query); } else if (query is SpanQuery) { ExtractWeightedSpanTerms(terms, (SpanQuery)query); } else if (query is FilteredQuery) { Extract(((FilteredQuery)query).Query, terms); } else if (query is DisjunctionMaxQuery) { foreach (var q in ((DisjunctionMaxQuery)query)) { Extract(q, terms); } } else if (query is MultiTermQuery && expandMultiTermQuery) { MultiTermQuery mtq = ((MultiTermQuery)query); if (mtq.RewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { mtq = (MultiTermQuery)mtq.Clone(); mtq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; query = mtq; } FakeReader fReader = new FakeReader(); MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq); if (fReader.Field != null) { IndexReader ir = GetReaderForField(fReader.Field); Extract(query.Rewrite(ir), terms); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } var disjunctLists = new List <SpanQuery> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; List <SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length)); ++distinctPositions; } for (int j = 0; j < termArray.Length; ++j) { disjuncts.Add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { List <SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts.ToArray()); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } } }
public virtual Query VisitSpanOrQuery(SpanOrQuery spanOrq) { throw new SnNotSupportedException(); }
private void QueryToSpanQuery(Query query, ICollection <byte[]> payloads) { if (query is BooleanQuery booleanQuery) { BooleanClause[] queryClauses = booleanQuery.GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited) { QueryToSpanQuery(queryClauses[i].Query, payloads); } } } else if (query is PhraseQuery phraseQuery) { Term[] phraseQueryTerms = phraseQuery.GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.Slop; bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder) { Boost = query.Boost }; GetPayloads(payloads, sp); } else if (query is TermQuery termQuery) { SpanTermQuery stq = new SpanTermQuery(termQuery.Term) { Boost = query.Boost }; GetPayloads(payloads, stq); } else if (query is SpanQuery spanQuery) { GetPayloads(payloads, spanQuery); } else if (query is FilteredQuery filteredQuery) { QueryToSpanQuery(filteredQuery.Query, payloads); } else if (query is DisjunctionMaxQuery disjunctionMaxQuery) { foreach (var q in disjunctionMaxQuery) { QueryToSpanQuery(q, payloads); } } else if (query is MultiPhraseQuery mpq) { IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } // LUCENENET: Changed from Query to SpanQuery to eliminate the O(n) cast // required to instantiate SpanOrQuery below IList <SpanQuery>[] disjunctLists = new JCG.List <SpanQuery> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; IList <SpanQuery> disjuncts = disjunctLists[positions[i]]; // LUCENENET: Changed from Query to SpanQuery if (disjuncts is null) { disjuncts = (disjunctLists[positions[i]] = new JCG.List <SpanQuery>(termArray.Length)); // LUCENENET: Changed from Query to SpanQuery ++distinctPositions; } foreach (Term term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { IList <SpanQuery> disjuncts = disjunctLists[i]; // LUCENENET: Changed from Query to SpanQuery if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } } }
private static void VisitQuery(SpanOrQuery query, AzureQueryLogger.IndentedTextWriter writer) { SpanQuery[] clauses = query.GetClauses(); AzureQueryLogger.VisitClauses(writer, clauses); }