Esempio n. 1
0
        public override object Clone()
        {
            int sz = clauses.Count;
            SpanQuery[] newClauses = new SpanQuery[sz];

            for (int i = 0; i < sz; i++)
            {
                newClauses[i] = (SpanQuery)clauses[i].Clone();
            }
            PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, slop, inOrder, Function);
            boostingNearQuery.Boost = Boost;
            return boostingNearQuery;
        }
Esempio n. 2
0
 public PayloadNearQuery(SpanQuery[] clauses, int slop, bool inOrder)
     : this(clauses, slop, inOrder, new AveragePayloadFunction())
 {
 }
Esempio n. 3
0
 public PayloadNearSpanWeight(PayloadNearQuery outerInstance, SpanQuery query, IndexSearcher searcher)
     : base(query, searcher)
 {
     this.OuterInstance = outerInstance;
 }
Esempio n. 4
0
        /// <summary>
        /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <paramref name="query"/>.
        /// </summary>
        /// <param name="query"><see cref="Query"/> to extract Terms from</param>
        /// <param name="terms">Map to place created <see cref="WeightedSpanTerm"/>s in</param>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        protected virtual void Extract(Query query, IDictionary <string, WeightedSpanTerm> terms)
        {
            if (query is BooleanQuery booleanQuery)
            {
                IList <BooleanClause> queryClauses = booleanQuery.Clauses;

                for (int i = 0; i < queryClauses.Count; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        Extract(queryClauses[i].Query, terms);
                    }
                }
            }
            else if (query is PhraseQuery phraseQuery)
            {
                Term[]      phraseQueryTerms = phraseQuery.GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }
                int   slop      = phraseQuery.Slop;
                int[] positions = phraseQuery.GetPositions();
                // add largest position increment to slop
                if (positions.Length > 0)
                {
                    int lastPos    = positions[0];
                    int largestInc = 0;
                    int sz         = positions.Length;
                    for (int i = 1; i < sz; i++)
                    {
                        int pos = positions[i];
                        int inc = pos - lastPos;
                        if (inc > largestInc)
                        {
                            largestInc = inc;
                        }
                        lastPos = pos;
                    }
                    if (largestInc > 1)
                    {
                        slop += largestInc;
                    }
                }

                bool inorder = slop == 0;

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                ExtractWeightedSpanTerms(terms, sp);
            }
            else if (query is TermQuery)
            {
                ExtractWeightedTerms(terms, query);
            }
            else if (query is SpanQuery spanQuery)
            {
                ExtractWeightedSpanTerms(terms, spanQuery);
            }
            else if (query is FilteredQuery filteredQuery)
            {
                Extract(filteredQuery.Query, terms);
            }
            else if (query is ConstantScoreQuery constantScoreQuery)
            {
                Query q = constantScoreQuery.Query;
                if (q != null)
                {
                    Extract(q, terms);
                }
            }
            else if (query is CommonTermsQuery)
            {
                // specialized since rewriting would change the result query
                // this query is TermContext sensitive.
                ExtractWeightedTerms(terms, query);
            }
            else if (query is DisjunctionMaxQuery disjunctionMaxQuery)
            {
                foreach (var q in disjunctionMaxQuery)
                {
                    Extract(q, terms);
                }
            }
            else if (query is MultiPhraseQuery mpq)
            {
                IList <Term[]> termArrays = mpq.GetTermArrays();
                int[]          positions  = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    var disjunctLists     = new JCG.List <SpanQuery> [maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[] termArray = termArrays[i];
                        JCG.List <SpanQuery> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts is null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new JCG.List <SpanQuery>(termArray.Length));
                            ++distinctPositions;
                        }
                        foreach (var term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    foreach (var disjuncts in disjunctLists)
                    {
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    ExtractWeightedSpanTerms(terms, sp);
                }
            }
            else
            {
                Query origQuery = query;
                if (query is MultiTermQuery)
                {
                    if (!expandMultiTermQuery)
                    {
                        return;
                    }
                    MultiTermQuery copy = (MultiTermQuery)query.Clone();
                    copy.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                    origQuery = copy;
                }
                IndexReader reader    = GetLeafContext().Reader;
                Query       rewritten = origQuery.Rewrite(reader);
                if (rewritten != origQuery)
                {
                    // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
                    // if this method is overwritten in a subclass or above in the next recursion
                    Extract(rewritten, terms);
                }
            }
            ExtractUnknownQuery(query, terms);
        }
Esempio n. 5
0
 private void GetPayloads(ICollection<sbyte[]> payloads, SpanQuery query)
 {
     IDictionary<Term, TermContext> termContexts = new Dictionary<Term, TermContext>();
     SortedSet<Term> terms = new SortedSet<Term>();
     query.ExtractTerms(terms);
     foreach (Term term in terms)
     {
         termContexts[term] = TermContext.Build(Context, term);
     }
     foreach (AtomicReaderContext atomicReaderContext in Context.Leaves())
     {
         Spans spans = query.GetSpans(atomicReaderContext, ((AtomicReader)atomicReaderContext.Reader()).LiveDocs, termContexts);
         while (spans.Next() == true)
         {
             if (spans.PayloadAvailable)
             {
                 ICollection<sbyte[]> payload = spans.Payload;
                 foreach (sbyte[] bytes in payload)
                 {
                     payloads.Add(bytes);
                 }
             }
         }
     }
 }
 private PayloadNearQuery NewPhraseQuery(string fieldName, string phrase, bool inOrder, PayloadFunction function)
 {
     string[] words = phrase.Split("[\\s]+".ToCharArray());
     SpanQuery[] clauses = new SpanQuery[words.Length];
     for (int i = 0; i < clauses.Length; i++)
     {
         clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));
     }
     return new PayloadNearQuery(clauses, 0, inOrder, function);
 }
 private SpanNearQuery SpanNearQuery(string fieldName, string words)
 {
     string[] wordList = words.Split("[\\s]+".ToCharArray());
     SpanQuery[] clauses = new SpanQuery[wordList.Length];
     for (int i = 0; i < clauses.Length; i++)
     {
         clauses[i] = new PayloadTermQuery(new Term(fieldName, wordList[i]), new AveragePayloadFunction());
     }
     return new SpanNearQuery(clauses, 10000, false);
 }
 public RemoteClusteredSpanQuery(SpanQuery query)
     : base(query)
 {
 }
Esempio n. 9
0
 /// <summary>
 /// MACRO for SpanNotQuery </summary>
 public virtual SpanNotQuery Snot(SpanQuery i, SpanQuery e)
 {
     return(new SpanNotQuery(i, e));
 }
 public SpanLastQuery(SpanQuery match, Analyzer analyzer)
 {
     this.match    = match;
     this.analyzer = analyzer;
 }
 public SpanLastQuery(SpanQuery match, int end)
     : this(match, (Analyzer) new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_30))
 {
 }
Esempio n. 12
0
        /// <summary>
        /// Extracts the specified query.
        /// </summary>
        /// <param name="query">The query.</param>
        /// <param name="terms">The terms.</param>
        private void Extract(Query query, IDictionary <String, WeightedSpanTerm> terms)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        Extract(queryClauses[i].Query, terms);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                PhraseQuery phraseQuery      = ((PhraseQuery)query);
                Term[]      phraseQueryTerms = phraseQuery.GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }
                int   slop      = phraseQuery.Slop;
                int[] positions = phraseQuery.GetPositions();
                // add largest position increment to slop
                if (positions.Length > 0)
                {
                    int lastPos    = positions[0];
                    int largestInc = 0;
                    int sz         = positions.Length;
                    for (int i = 1; i < sz; i++)
                    {
                        int pos = positions[i];
                        int inc = pos - lastPos;
                        if (inc > largestInc)
                        {
                            largestInc = inc;
                        }
                        lastPos = pos;
                    }
                    if (largestInc > 1)
                    {
                        slop += largestInc;
                    }
                }

                bool inorder = slop == 0;

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                ExtractWeightedSpanTerms(terms, sp);
            }
            else if (query is TermQuery)
            {
                ExtractWeightedTerms(terms, query);
            }
            else if (query is SpanQuery)
            {
                ExtractWeightedSpanTerms(terms, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                Extract(((FilteredQuery)query).Query, terms);
            }
            else if (query is DisjunctionMaxQuery)
            {
                foreach (var q in ((DisjunctionMaxQuery)query))
                {
                    Extract(q, terms);
                }
            }
            else if (query is MultiTermQuery && expandMultiTermQuery)
            {
                MultiTermQuery mtq = ((MultiTermQuery)query);
                if (mtq.RewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
                {
                    mtq = (MultiTermQuery)mtq.Clone();
                    mtq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                    query             = mtq;
                }
                FakeReader fReader = new FakeReader();
                MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq);
                if (fReader.Field != null)
                {
                    IndexReader ir = GetReaderForField(fReader.Field);
                    Extract(query.Rewrite(ir), terms);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq        = (MultiPhraseQuery)query;
                IList <Term[]>   termArrays = mpq.GetTermArrays();
                int[]            positions  = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    var disjunctLists     = new List <SpanQuery> [maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]           termArray = termArrays[i];
                        List <SpanQuery> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length));
                            ++distinctPositions;
                        }
                        for (int j = 0; j < termArray.Length; ++j)
                        {
                            disjuncts.Add(new SpanTermQuery(termArray[j]));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        List <SpanQuery> disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    ExtractWeightedSpanTerms(terms, sp);
                }
            }
        }
Esempio n. 13
0
        /// <summary>
        /// Extracts the weighted span terms.
        /// </summary>
        /// <param name="terms">The terms.</param>
        /// <param name="spanQuery">The span query.</param>
        private void ExtractWeightedSpanTerms(IDictionary <String, WeightedSpanTerm> terms, SpanQuery spanQuery)
        {
            HashSet <String> fieldNames;

            if (fieldName == null)
            {
                fieldNames = new HashSet <String>();
                CollectSpanQueryFields(spanQuery, fieldNames);
            }
            else
            {
                fieldNames = new HashSet <String>();
                fieldNames.Add(fieldName);
            }
            // To support the use of the default field name
            if (defaultField != null)
            {
                fieldNames.Add(defaultField);
            }

            IDictionary <String, SpanQuery> queries = new HashMap <String, SpanQuery>();

            var  nonWeightedTerms = new HashSet <Term>();
            bool mustRewriteQuery = MustRewriteQuery(spanQuery);

            if (mustRewriteQuery)
            {
                foreach (String field in fieldNames)
                {
                    SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetReaderForField(field));
                    queries[field] = rewrittenQuery;
                    rewrittenQuery.ExtractTerms(nonWeightedTerms);
                }
            }
            else
            {
                spanQuery.ExtractTerms(nonWeightedTerms);
            }

            List <PositionSpan> spanPositions = new List <PositionSpan>();

            foreach (String field in fieldNames)
            {
                IndexReader reader = GetReaderForField(field);
                Spans       spans;
                if (mustRewriteQuery)
                {
                    spans = queries[field].GetSpans(reader);
                }
                else
                {
                    spans = spanQuery.GetSpans(reader);
                }


                // collect span positions
                while (spans.Next())
                {
                    spanPositions.Add(new PositionSpan(spans.Start(), spans.End() - 1));
                }
            }

            if (spanPositions.Count == 0)
            {
                // no spans found
                return;
            }

            foreach (Term queryTerm in nonWeightedTerms)
            {
                if (FieldNameComparator(queryTerm.Field))
                {
                    WeightedSpanTerm weightedSpanTerm = terms[queryTerm.Text];

                    if (weightedSpanTerm == null)
                    {
                        weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text);
                        weightedSpanTerm.AddPositionSpans(spanPositions);
                        weightedSpanTerm.SetPositionSensitive(true);
                        terms[queryTerm.Text] = weightedSpanTerm;
                    }
                    else
                    {
                        if (spanPositions.Count > 0)
                        {
                            weightedSpanTerm.AddPositionSpans(spanPositions);
                        }
                    }
                }
            }
        }
Esempio n. 14
0
        /// <summary>
        /// Formats the query info.
        /// </summary>
        /// <param name="query">The query.</param>
        /// <returns>String containing formatted QueryInfo</returns>
        protected override string FormatQueryInfo(BaseMultiIndexIdQuery <SpanQueryResult> query)
        {
            SpanQuery spanQuery = query as SpanQuery;

            return(spanQuery.ToString());
        }
Esempio n. 15
0
 public PayloadNearQuery(SpanQuery[] clauses, int slop, bool inOrder, PayloadFunction function)
     : base(clauses, slop, inOrder)
 {
     FieldName = clauses[0].Field; // all clauses must have same field
     this.Function = function;
 }
Esempio n. 16
0
 /// <summary>
 /// MACRO for SpanOrQuery containing two SpanQueries </summary>
 public virtual SpanOrQuery Sor(SpanQuery s, SpanQuery m, SpanQuery e)
 {
     return(new SpanOrQuery(s, m, e));
 }
Esempio n. 17
0
 /// <summary>
 /// MACRO for SpanNotQuery </summary>
 public virtual SpanNotQuery Snot(SpanQuery i, SpanQuery e)
 {
     return new SpanNotQuery(i, e);
 }
Esempio n. 18
0
 public virtual void TestPayloadNear()
 {
     SpanNearQuery q1, q2;
     PayloadNearQuery query;
     //SpanNearQuery(clauses, 10000, false)
     q1 = SpanNearQuery("field2", "twenty two");
     q2 = SpanNearQuery("field2", "twenty three");
     SpanQuery[] clauses = new SpanQuery[2];
     clauses[0] = q1;
     clauses[1] = q2;
     query = new PayloadNearQuery(clauses, 10, false);
     //System.out.println(query.toString());
     Assert.AreEqual(12, Searcher.Search(query, null, 100).TotalHits);
     /*
     System.out.println(hits.TotalHits);
     for (int j = 0; j < hits.ScoreDocs.Length; j++) {
       ScoreDoc doc = hits.ScoreDocs[j];
       System.out.println("doc: "+doc.Doc+", score: "+doc.Score);
     }
     */
 }
Esempio n. 19
0
 /// <summary>
 /// MACRO for SpanOrQuery containing two SpanQueries </summary>
 public virtual SpanOrQuery Sor(SpanQuery s, SpanQuery m, SpanQuery e)
 {
     return new SpanOrQuery(s, m, e);
 }
Esempio n. 20
0
        public virtual void TestComplexNested()
        {
            PayloadNearQuery query;
            TopDocs hits;

            // combine ordered and unordered spans with some nesting to make sure all payloads are counted

            SpanQuery q1 = NewPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction());
            SpanQuery q2 = NewPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction());
            SpanQuery q3 = NewPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction());
            SpanQuery q4 = NewPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction());
            SpanQuery[] clauses = new SpanQuery[] { new PayloadNearQuery(new SpanQuery[] { q1, q2 }, 0, true), new PayloadNearQuery(new SpanQuery[] { q3, q4 }, 0, false) };
            query = new PayloadNearQuery(clauses, 0, false);
            hits = Searcher.Search(query, null, 100);
            Assert.IsTrue(hits != null, "hits is null and it shouldn't be");
            // should be only 1 hit - doc 999
            Assert.IsTrue(hits.ScoreDocs.Length == 1, "should only be one hit");
            // the score should be 3 - the average of all the underlying payloads
            ScoreDoc doc = hits.ScoreDocs[0];
            //    System.out.println("Doc: " + doc.toString());
            //    System.out.println("Explain: " + searcher.Explain(query, doc.Doc));
            Assert.IsTrue(doc.Score == 3, doc.Score + " does not equal: " + 3);
        }
Esempio n. 21
0
 /// <summary>
 /// MACRO for SpanNearQuery containing three SpanQueries </summary>
 public virtual SpanNearQuery Snear(SpanQuery s, SpanQuery m, SpanQuery e, int slop, bool inOrder)
 {
     return new SpanNearQuery(new SpanQuery[] { s, m, e }, slop, inOrder);
 }
        public virtual void TestPayloadsPos0()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockPayloadAnalyzer());
            Document doc = new Document();
            doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
            writer.AddDocument(doc);

            IndexReader readerFromWriter = writer.Reader;
            AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter);

            DocsAndPositionsEnum tp = r.TermPositionsEnum(new Term("content", "a"));

            int count = 0;
            Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq());
            Assert.AreEqual(0, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc());

            IndexSearcher @is = NewSearcher(readerFromWriter);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
            SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;
            if (VERBOSE)
            {
                Console.WriteLine("\ngetPayloadSpans test");
            }
            Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            while (pspans.Next())
            {
                if (VERBOSE)
                {
                    Console.WriteLine("doc " + pspans.Doc() + ": span " + pspans.Start() + " to " + pspans.End());
                }
                var payloads = pspans.Payload;
                sawZero |= pspans.Start() == 0;
                foreach (var bytes in payloads)
                {
                    count++;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  payload: " + Encoding.UTF8.GetString((byte[])(Array)bytes));
                    }
                }
            }
            Assert.IsTrue(sawZero);
            Assert.AreEqual(5, count);

            // System.out.println("\ngetSpans test");
            Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            count = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start() == 0;
                // System.out.println(spans.Doc() + " - " + spans.Start() + " - " +
                // spans.End());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            // System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext);
            var pls = psu.GetPayloadsForQuery(snq);
            count = pls.Count;
            foreach (var bytes in pls)
            {
                string s = Encoding.UTF8.GetString(bytes);
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0");
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Dispose();
            @is.IndexReader.Dispose();
            dir.Dispose();
        }
 public VirtualSpanQuery(SpanQuery query)
     : base(query)
 {
 }
Esempio n. 24
0
        private void QueryToSpanQuery(Query query, ICollection<sbyte[]> payloads)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).Clauses;

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].Prohibited)
                    {
                        QueryToSpanQuery(queryClauses[i].Query, payloads);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                Term[] phraseQueryTerms = ((PhraseQuery)query).Terms;
                SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }

                int slop = ((PhraseQuery)query).Slop;
                bool inorder = false;

                if (slop == 0)
                {
                    inorder = true;
                }

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                GetPayloads(payloads, sp);
            }
            else if (query is TermQuery)
            {
                SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term);
                stq.Boost = query.Boost;
                GetPayloads(payloads, stq);
            }
            else if (query is SpanQuery)
            {
                GetPayloads(payloads, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                QueryToSpanQuery(((FilteredQuery)query).Query, payloads);
            }
            else if (query is DisjunctionMaxQuery)
            {
                IEnumerator<Query> enumerator = ((DisjunctionMaxQuery)query).GetEnumerator();
                while (enumerator.MoveNext())
                {
                    QueryToSpanQuery(enumerator.Current, payloads);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq = (MultiPhraseQuery)query;
                IList<Term[]> termArrays = mpq.TermArrays;
                int[] positions = mpq.Positions;
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    IList<Query>[] disjunctLists = new List<Query>[maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[] termArray = termArrays[i];
                        IList<Query> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List<Query>(termArray.Length));
                            ++distinctPositions;
                        }
                        foreach (Term term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int positionGaps = 0;
                    int position = 0;
                    SpanQuery[] clauses = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        IList<Query> disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts.OfType<SpanQuery>().ToArray());
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int slop = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    GetPayloads(payloads, sp);
                }
            }
        }
Esempio n. 25
0
        /// <summary>
        /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <see cref="SpanQuery"/>.
        /// </summary>
        /// <param name="terms"><see cref="T:IDictionary{string, WeightedSpanTerm}"/> to place created <see cref="WeightedSpanTerm"/>s in</param>
        /// <param name="spanQuery"><see cref="SpanQuery"/> to extract Terms from</param>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        protected virtual void ExtractWeightedSpanTerms(IDictionary <string, WeightedSpanTerm> terms, SpanQuery spanQuery)
        {
            ISet <string> fieldNames;

            if (fieldName is null)
            {
                fieldNames = new JCG.HashSet <string>();
                CollectSpanQueryFields(spanQuery, fieldNames);
            }
            else
            {
                fieldNames = new JCG.HashSet <string>
                {
                    fieldName
                };
            }
            // To support the use of the default field name
            if (defaultField != null)
            {
                fieldNames.Add(defaultField);
            }

            IDictionary <string, SpanQuery> queries = new JCG.Dictionary <string, SpanQuery>();

            var  nonWeightedTerms = new JCG.HashSet <Term>();
            bool mustRewriteQuery = MustRewriteQuery(spanQuery);

            if (mustRewriteQuery)
            {
                foreach (string field in fieldNames)
                {
                    SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetLeafContext().Reader);
                    queries[field] = rewrittenQuery;
                    rewrittenQuery.ExtractTerms(nonWeightedTerms);
                }
            }
            else
            {
                spanQuery.ExtractTerms(nonWeightedTerms);
            }

            IList <PositionSpan> spanPositions = new JCG.List <PositionSpan>();

            foreach (string field in fieldNames)
            {
                SpanQuery q;
                q = mustRewriteQuery ? queries[field] : spanQuery;

                AtomicReaderContext context = GetLeafContext();
                var         termContexts    = new JCG.Dictionary <Term, TermContext>();
                ISet <Term> extractedTerms  = new JCG.SortedSet <Term>();
                q.ExtractTerms(extractedTerms);
                foreach (Term term in extractedTerms)
                {
                    termContexts[term] = TermContext.Build(context, term);
                }
                IBits       acceptDocs = context.AtomicReader.LiveDocs;
                Spans.Spans spans      = q.GetSpans(context, acceptDocs, termContexts);

                // collect span positions
                while (spans.MoveNext())
                {
                    spanPositions.Add(new PositionSpan(spans.Start, spans.End - 1));
                }
            }

            if (spanPositions.Count == 0)
            {
                // no spans found
                return;
            }

            foreach (Term queryTerm in nonWeightedTerms)
            {
                if (FieldNameComparer(queryTerm.Field))
                {
                    if (!terms.TryGetValue(queryTerm.Text, out WeightedSpanTerm weightedSpanTerm) || weightedSpanTerm is null)
                    {
                        weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text);
                        weightedSpanTerm.AddPositionSpans(spanPositions);
                        weightedSpanTerm.IsPositionSensitive = true;
                        terms[queryTerm.Text] = weightedSpanTerm;
                    }
                    else
                    {
                        if (spanPositions.Count > 0)
                        {
                            weightedSpanTerm.AddPositionSpans(spanPositions);
                        }
                    }
                }
            }
        }