public void TestSpanNot()
        {
            Directory dir = NewDirectory();
            // use simpleanalyzer for more natural tokenization (else "test." is a token)
            Analyzer          analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("This is a test.");
            iw.AddDocument(doc);
            body.SetStringValue("Test a one sentence document.");
            iw.AddDocument(doc);

            IndexReader ir = iw.GetReader();

            iw.Dispose();

            IndexSearcher          searcher    = NewSearcher(ir);
            ICUPostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer);
            SpanQuery include = new SpanMultiTermQueryWrapper <WildcardQuery>(new WildcardQuery(new Term("body", "te*")));
            SpanQuery exclude = new SpanTermQuery(new Term("body", "bogus"));
            Query     query   = new SpanNotQuery(include, exclude);
            TopDocs   topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(2, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            ir.Dispose();
            dir.Dispose();
        }
Exemplo n.º 2
0
        public virtual void TestBooleanSpanQuery()
        {
            bool      failed          = false;
            int       hits            = 0;
            Directory directory       = NewDirectory();
            Analyzer  indexerAnalyzer = new MockAnalyzer(Random);

            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
            IndexWriter       writer = new IndexWriter(directory, config);
            string            FIELD  = "content";
            Document          d      = new Document();

            d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
            writer.AddDocument(d);
            writer.Dispose();

            IndexReader   indexReader = DirectoryReader.Open(directory);
            IndexSearcher searcher    = NewSearcher(indexReader);

            BooleanQuery query = new BooleanQuery();
            SpanQuery    sq1   = new SpanTermQuery(new Term(FIELD, "clockwork"));
            SpanQuery    sq2   = new SpanTermQuery(new Term(FIELD, "clckwork"));

            query.Add(sq1, Occur.SHOULD);
            query.Add(sq2, Occur.SHOULD);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);

            searcher.Search(query, collector);
            hits = collector.GetTopDocs().ScoreDocs.Length;
            foreach (ScoreDoc scoreDoc in collector.GetTopDocs().ScoreDocs)
            {
                Console.WriteLine(scoreDoc.Doc);
            }
            indexReader.Dispose();
            Assert.AreEqual(failed, false, "Bug in boolean query composed of span queries");
            Assert.AreEqual(hits, 1, "Bug in boolean query composed of span queries");
            directory.Dispose();
        }
Exemplo n.º 3
0
        public virtual void TestCrazySpans()
        {
            // The problem: "normal" lucene queries create scorers, returning null if terms dont exist
            // this means they never score a term that does not exist.
            // however with spans, there is only one scorer for the whole hierarchy:
            // inner queries are not real queries, their boosts are ignored, etc.
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document  doc = new Document();
            FieldType ft  = new FieldType(TextField.TYPE_NOT_STORED);

            doc.Add(NewField("foo", "bar", ft));
            iw.AddDocument(doc);
            IndexReader ir = iw.GetReader();

            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in sims)
            {
                @is.Similarity = sim;
                SpanTermQuery s1    = new SpanTermQuery(new Term("foo", "bar"));
                SpanTermQuery s2    = new SpanTermQuery(new Term("foo", "baz"));
                Query         query = new SpanOrQuery(s1, s2);
                TopDocs       td    = @is.Search(query, 10);
                Assert.AreEqual(1, td.TotalHits);
                float score = td.ScoreDocs[0].Score;
                Assert.IsTrue(score >= 0.0f);
                Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim);
            }
            ir.Dispose();
            dir.Dispose();
        }
Exemplo n.º 4
0
        public override SpanQuery GetSpanQuery(XmlElement e)
        {
            string fieldName = DOMUtils.GetAttributeWithInheritanceOrFail(e, "fieldName");
            string value     = DOMUtils.GetNonBlankTextOrFail(e);

            List <SpanQuery> clausesList = new List <SpanQuery>();

            TokenStream ts = null;

            try
            {
                ts = analyzer.GetTokenStream(fieldName, value);
                ITermToBytesRefAttribute termAtt = ts.AddAttribute <ITermToBytesRefAttribute>();
                BytesRef bytes = termAtt.BytesRef;
                ts.Reset();
                while (ts.IncrementToken())
                {
                    termAtt.FillBytesRef();
                    SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.DeepCopyOf(bytes)));
                    clausesList.Add(stq);
                }
                ts.End();
                SpanOrQuery soq = new SpanOrQuery(clausesList.ToArray(/*new SpanQuery[clausesList.size()]*/));
                soq.Boost = DOMUtils.GetAttribute(e, "boost", 1.0f);
                return(soq);
            }
#pragma warning disable 168
            catch (IOException ioe)
#pragma warning restore 168
            {
                throw new ParserException("IOException parsing value:" + value);
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(ts);
            }
        }
Exemplo n.º 5
0
        public virtual void TestPayloadsPos0()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, new MockPayloadAnalyzer());
            Document          doc    = new Document();

            doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
            writer.AddDocument(doc);

            IndexReader  readerFromWriter = writer.GetReader();
            AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter);

            DocsAndPositionsEnum tp = r.GetTermPositionsEnum(new Term("content", "a"));

            int count = 0;

            Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq);
            Assert.AreEqual(0, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc());

            IndexSearcher @is = NewSearcher(readerFromWriter);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;

            if (Verbose)
            {
                Console.WriteLine("\ngetPayloadSpans test");
            }
            Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            while (pspans.MoveNext())
            {
                if (Verbose)
                {
                    Console.WriteLine("doc " + pspans.Doc + ": span " + pspans.Start + " to " + pspans.End);
                }
                var payloads = pspans.GetPayload();
                sawZero |= pspans.Start == 0;
                foreach (var bytes in payloads)
                {
                    count++;
                    if (Verbose)
                    {
                        Console.WriteLine("  payload: " + Encoding.UTF8.GetString(bytes));
                    }
                }
            }
            Assert.IsTrue(sawZero);
            Assert.AreEqual(5, count);

            // System.out.println("\ngetSpans test");
            Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            count   = 0;
            sawZero = false;
            while (spans.MoveNext())
            {
                count++;
                sawZero |= spans.Start == 0;
                // System.out.println(spans.Doc() + " - " + spans.Start() + " - " +
                // spans.End());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            // System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext);
            var             pls = psu.GetPayloadsForQuery(snq);

            count = pls.Count;
            foreach (var bytes in pls)
            {
                string s = Encoding.UTF8.GetString(bytes);
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0", StringComparison.Ordinal);
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Dispose();
            @is.IndexReader.Dispose();
            dir.Dispose();
        }
Exemplo n.º 6
0
        private void QueryToSpanQuery(Query query, ICollection<sbyte[]> payloads)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).Clauses;

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].Prohibited)
                    {
                        QueryToSpanQuery(queryClauses[i].Query, payloads);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                Term[] phraseQueryTerms = ((PhraseQuery)query).Terms;
                SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }

                int slop = ((PhraseQuery)query).Slop;
                bool inorder = false;

                if (slop == 0)
                {
                    inorder = true;
                }

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                GetPayloads(payloads, sp);
            }
            else if (query is TermQuery)
            {
                SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term);
                stq.Boost = query.Boost;
                GetPayloads(payloads, stq);
            }
            else if (query is SpanQuery)
            {
                GetPayloads(payloads, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                QueryToSpanQuery(((FilteredQuery)query).Query, payloads);
            }
            else if (query is DisjunctionMaxQuery)
            {
                IEnumerator<Query> enumerator = ((DisjunctionMaxQuery)query).GetEnumerator();
                while (enumerator.MoveNext())
                {
                    QueryToSpanQuery(enumerator.Current, payloads);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq = (MultiPhraseQuery)query;
                IList<Term[]> termArrays = mpq.TermArrays;
                int[] positions = mpq.Positions;
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    IList<Query>[] disjunctLists = new List<Query>[maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[] termArray = termArrays[i];
                        IList<Query> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List<Query>(termArray.Length));
                            ++distinctPositions;
                        }
                        foreach (Term term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int positionGaps = 0;
                    int position = 0;
                    SpanQuery[] clauses = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        IList<Query> disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts.OfType<SpanQuery>().ToArray());
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int slop = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    GetPayloads(payloads, sp);
                }
            }
        }
Exemplo n.º 7
0
        public virtual void TestCrazySpans()
        {
            // The problem: "normal" lucene queries create scorers, returning null if terms dont exist
            // this means they never score a term that does not exist.
            // however with spans, there is only one scorer for the whole hierarchy:
            // inner queries are not real queries, their boosts are ignored, etc.
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            doc.Add(NewField("foo", "bar", ft));
            iw.AddDocument(doc);
            IndexReader ir = iw.Reader;
            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in Sims)
            {
                @is.Similarity = sim;
                SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar"));
                SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz"));
                Query query = new SpanOrQuery(s1, s2);
                TopDocs td = @is.Search(query, 10);
                Assert.AreEqual(1, td.TotalHits);
                float score = td.ScoreDocs[0].Score;
                Assert.IsTrue(score >= 0.0f);
                Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim);
            }
            ir.Dispose();
            dir.Dispose();
        }
        /// <summary>
        /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>.
        /// </summary>
        /// <param name="query">Query to extract Terms from</param>
        /// <param name="terms">Map to place created WeightedSpanTerms in</param>
        private void Extract(Query query, IDictionary <String, WeightedSpanTerm> terms)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        Extract(queryClauses[i].Query, terms);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                PhraseQuery phraseQuery      = ((PhraseQuery)query);
                Term[]      phraseQueryTerms = phraseQuery.GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }
                int   slop      = phraseQuery.Slop;
                int[] positions = phraseQuery.GetPositions();
                // add largest position increment to slop
                if (positions.Length > 0)
                {
                    int lastPos    = positions[0];
                    int largestInc = 0;
                    int sz         = positions.Length;
                    for (int i = 1; i < sz; i++)
                    {
                        int pos = positions[i];
                        int inc = pos - lastPos;
                        if (inc > largestInc)
                        {
                            largestInc = inc;
                        }
                        lastPos = pos;
                    }
                    if (largestInc > 1)
                    {
                        slop += largestInc;
                    }
                }

                bool inorder = slop == 0;

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                ExtractWeightedSpanTerms(terms, sp);
            }
            else if (query is TermQuery)
            {
                ExtractWeightedTerms(terms, query);
            }
            else if (query is SpanQuery)
            {
                ExtractWeightedSpanTerms(terms, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                Extract(((FilteredQuery)query).Query, terms);
            }
            else if (query is DisjunctionMaxQuery)
            {
                foreach (var q in ((DisjunctionMaxQuery)query))
                {
                    Extract(q, terms);
                }
            }
            else if (query is MultiTermQuery && expandMultiTermQuery)
            {
                MultiTermQuery mtq = ((MultiTermQuery)query);
                if (mtq.RewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
                {
                    mtq = (MultiTermQuery)mtq.Clone();
                    mtq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                    query             = mtq;
                }
                FakeReader fReader = new FakeReader();
                MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq);
                if (fReader.Field != null)
                {
                    IndexReader ir = GetReaderForField(fReader.Field);
                    Extract(query.Rewrite(ir), terms);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq        = (MultiPhraseQuery)query;
                IList <Term[]>   termArrays = mpq.GetTermArrays();
                int[]            positions  = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    var disjunctLists     = new List <SpanQuery> [maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]           termArray = termArrays[i];
                        List <SpanQuery> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length));
                            ++distinctPositions;
                        }
                        for (int j = 0; j < termArray.Length; ++j)
                        {
                            disjuncts.Add(new SpanTermQuery(termArray[j]));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        List <SpanQuery> disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    ExtractWeightedSpanTerms(terms, sp);
                }
            }
        }
Exemplo n.º 9
0
        private void QueryToSpanQuery(Query query, ICollection <byte[]> payloads)
        {
            if (query is BooleanQuery booleanQuery)
            {
                BooleanClause[] queryClauses = booleanQuery.GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        QueryToSpanQuery(queryClauses[i].Query, payloads);
                    }
                }
            }
            else if (query is PhraseQuery phraseQuery)
            {
                Term[]      phraseQueryTerms = phraseQuery.GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }

                int  slop    = phraseQuery.Slop;
                bool inorder = false;

                if (slop == 0)
                {
                    inorder = true;
                }

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder)
                {
                    Boost = query.Boost
                };
                GetPayloads(payloads, sp);
            }
            else if (query is TermQuery termQuery)
            {
                SpanTermQuery stq = new SpanTermQuery(termQuery.Term)
                {
                    Boost = query.Boost
                };
                GetPayloads(payloads, stq);
            }
            else if (query is SpanQuery spanQuery)
            {
                GetPayloads(payloads, spanQuery);
            }
            else if (query is FilteredQuery filteredQuery)
            {
                QueryToSpanQuery(filteredQuery.Query, payloads);
            }
            else if (query is DisjunctionMaxQuery disjunctionMaxQuery)
            {
                foreach (var q in disjunctionMaxQuery)
                {
                    QueryToSpanQuery(q, payloads);
                }
            }
            else if (query is MultiPhraseQuery mpq)
            {
                IList <Term[]> termArrays = mpq.GetTermArrays();
                int[]          positions  = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    // LUCENENET: Changed from Query to SpanQuery to eliminate the O(n) cast
                    // required to instantiate SpanOrQuery below
                    IList <SpanQuery>[] disjunctLists = new JCG.List <SpanQuery> [maxPosition + 1];
                    int distinctPositions             = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]            termArray = termArrays[i];
                        IList <SpanQuery> disjuncts = disjunctLists[positions[i]]; // LUCENENET: Changed from Query to SpanQuery
                        if (disjuncts is null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new JCG.List <SpanQuery>(termArray.Length)); // LUCENENET: Changed from Query to SpanQuery
                            ++distinctPositions;
                        }
                        foreach (Term term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        IList <SpanQuery> disjuncts = disjunctLists[i]; // LUCENENET: Changed from Query to SpanQuery
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts);
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    GetPayloads(payloads, sp);
                }
            }
        }
Exemplo n.º 10
0
 private static void VisitQuery(SpanTermQuery query, AzureQueryLogger.IndentedTextWriter writer)
 {
     AzureQueryLogger.VisitTerm(query.Term, writer);
 }
        public virtual void TestBooleanSpanQuery()
        {
            int hits = 0;
            Directory directory = NewDirectory();
            Analyzer indexerAnalyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            string FIELD = "content";
            Document d = new Document();
            d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
            writer.AddDocument(d);
            writer.Dispose();

            IndexReader indexReader = DirectoryReader.Open(directory);
            IndexSearcher searcher = NewSearcher(indexReader);

            DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
            SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
            SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
            query.Add(sq1);
            query.Add(sq2);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);
            searcher.Search(query, collector);
            hits = collector.TopDocs().ScoreDocs.Length;
            foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs)
            {
                Console.WriteLine(scoreDoc.Doc);
            }
            indexReader.Dispose();
            Assert.AreEqual(hits, 1);
            directory.Dispose();
        }
Exemplo n.º 12
0
 private PayloadNearQuery NewPhraseQuery(string fieldName, string phrase, bool inOrder, PayloadFunction function)
 {
     string[] words = phrase.Split("[\\s]+".ToCharArray());
     SpanQuery[] clauses = new SpanQuery[words.Length];
     for (int i = 0; i < clauses.Length; i++)
     {
         clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));
     }
     return new PayloadNearQuery(clauses, 0, inOrder, function);
 }
Exemplo n.º 13
0
 public virtual Query VisitSpanTermQuery(SpanTermQuery spanTermq)
 {
     throw new NotImplementedException();
 }
Exemplo n.º 14
0
 public virtual Query VisitSpanTermQuery(SpanTermQuery spanTermq)
 {
     throw new SnNotSupportedException();
 }
Exemplo n.º 15
0
        /// <summary>
        /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <paramref name="query"/>.
        /// </summary>
        /// <param name="query"><see cref="Query"/> to extract Terms from</param>
        /// <param name="terms">Map to place created <see cref="WeightedSpanTerm"/>s in</param>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        protected virtual void Extract(Query query, IDictionary <string, WeightedSpanTerm> terms)
        {
            if (query is BooleanQuery booleanQuery)
            {
                IList <BooleanClause> queryClauses = booleanQuery.Clauses;

                for (int i = 0; i < queryClauses.Count; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        Extract(queryClauses[i].Query, terms);
                    }
                }
            }
            else if (query is PhraseQuery phraseQuery)
            {
                Term[]      phraseQueryTerms = phraseQuery.GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }
                int   slop      = phraseQuery.Slop;
                int[] positions = phraseQuery.GetPositions();
                // add largest position increment to slop
                if (positions.Length > 0)
                {
                    int lastPos    = positions[0];
                    int largestInc = 0;
                    int sz         = positions.Length;
                    for (int i = 1; i < sz; i++)
                    {
                        int pos = positions[i];
                        int inc = pos - lastPos;
                        if (inc > largestInc)
                        {
                            largestInc = inc;
                        }
                        lastPos = pos;
                    }
                    if (largestInc > 1)
                    {
                        slop += largestInc;
                    }
                }

                bool inorder = slop == 0;

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                ExtractWeightedSpanTerms(terms, sp);
            }
            else if (query is TermQuery)
            {
                ExtractWeightedTerms(terms, query);
            }
            else if (query is SpanQuery spanQuery)
            {
                ExtractWeightedSpanTerms(terms, spanQuery);
            }
            else if (query is FilteredQuery filteredQuery)
            {
                Extract(filteredQuery.Query, terms);
            }
            else if (query is ConstantScoreQuery constantScoreQuery)
            {
                Query q = constantScoreQuery.Query;
                if (q != null)
                {
                    Extract(q, terms);
                }
            }
            else if (query is CommonTermsQuery)
            {
                // specialized since rewriting would change the result query
                // this query is TermContext sensitive.
                ExtractWeightedTerms(terms, query);
            }
            else if (query is DisjunctionMaxQuery disjunctionMaxQuery)
            {
                foreach (var q in disjunctionMaxQuery)
                {
                    Extract(q, terms);
                }
            }
            else if (query is MultiPhraseQuery mpq)
            {
                IList <Term[]> termArrays = mpq.GetTermArrays();
                int[]          positions  = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    var disjunctLists     = new List <SpanQuery> [maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]           termArray = termArrays[i];
                        List <SpanQuery> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length));
                            ++distinctPositions;
                        }
                        foreach (var term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    foreach (var disjuncts in disjunctLists)
                    {
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    ExtractWeightedSpanTerms(terms, sp);
                }
            }
            else
            {
                Query origQuery = query;
                if (query is MultiTermQuery)
                {
                    if (!expandMultiTermQuery)
                    {
                        return;
                    }
                    MultiTermQuery copy = (MultiTermQuery)query.Clone();
                    copy.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                    origQuery = copy;
                }
                IndexReader reader    = GetLeafContext().Reader;
                Query       rewritten = origQuery.Rewrite(reader);
                if (rewritten != origQuery)
                {
                    // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
                    // if this method is overwritten in a subclass or above in the next recursion
                    Extract(rewritten, terms);
                }
            }
            ExtractUnknownQuery(query, terms);
        }
Exemplo n.º 16
0
        public virtual void TestPayloadsPos0()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockPayloadAnalyzer());
            Document doc = new Document();
            doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
            writer.AddDocument(doc);

            IndexReader readerFromWriter = writer.Reader;
            AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter);

            DocsAndPositionsEnum tp = r.TermPositionsEnum(new Term("content", "a"));

            int count = 0;
            Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq());
            Assert.AreEqual(0, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc());

            IndexSearcher @is = NewSearcher(readerFromWriter);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
            SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;
            if (VERBOSE)
            {
                Console.WriteLine("\ngetPayloadSpans test");
            }
            Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            while (pspans.Next())
            {
                if (VERBOSE)
                {
                    Console.WriteLine("doc " + pspans.Doc() + ": span " + pspans.Start() + " to " + pspans.End());
                }
                var payloads = pspans.Payload;
                sawZero |= pspans.Start() == 0;
                foreach (var bytes in payloads)
                {
                    count++;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  payload: " + Encoding.UTF8.GetString((byte[])(Array)bytes));
                    }
                }
            }
            Assert.IsTrue(sawZero);
            Assert.AreEqual(5, count);

            // System.out.println("\ngetSpans test");
            Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            count = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start() == 0;
                // System.out.println(spans.Doc() + " - " + spans.Start() + " - " +
                // spans.End());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            // System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext);
            var pls = psu.GetPayloadsForQuery(snq);
            count = pls.Count;
            foreach (var bytes in pls)
            {
                string s = Encoding.UTF8.GetString(bytes);
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0");
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Dispose();
            @is.IndexReader.Dispose();
            dir.Dispose();
        }
Exemplo n.º 17
0
        public virtual void TestBasic()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(NewStringField("id", "0", Field.Store.YES));
            doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO));
            w.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            // 1 extra token, but wizard and oz are close;
            doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO));
            w.AddDocument(doc);
            IndexReader r = w.GetReader();

            w.Dispose();

            // Do ordinary BooleanQuery:
            BooleanQuery bq = new BooleanQuery();

            bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
            bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
            IndexSearcher searcher = GetSearcher(r);

            searcher.Similarity = new DefaultSimilarity();

            TopDocs hits = searcher.Search(bq, 10);

            Assert.AreEqual(2, hits.TotalHits);
            Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id"));

            // Now, resort using PhraseQuery:
            PhraseQuery pq = new PhraseQuery();

            pq.Slop = 5;
            pq.Add(new Term("field", "wizard"));
            pq.Add(new Term("field", "oz"));

            TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10);

            // Resorting changed the order:
            Assert.AreEqual(2, hits2.TotalHits);
            Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id"));

            // Resort using SpanNearQuery:
            SpanTermQuery t1  = new SpanTermQuery(new Term("field", "wizard"));
            SpanTermQuery t2  = new SpanTermQuery(new Term("field", "oz"));
            SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true);

            TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10);

            // Resorting changed the order:
            Assert.AreEqual(2, hits3.TotalHits);
            Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id"));

            r.Dispose();
            dir.Dispose();
        }
Exemplo n.º 18
0
        private void  QueryToSpanQuery(Query query, ICollection <byte[]> payloads)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        QueryToSpanQuery(queryClauses[i].Query, payloads);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                Term[]      phraseQueryTerms = ((PhraseQuery)query).GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }

                int  slop    = ((PhraseQuery)query).Slop;
                bool inorder = false;

                if (slop == 0)
                {
                    inorder = true;
                }

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                GetPayloads(payloads, sp);
            }
            else if (query is TermQuery)
            {
                SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term);
                stq.Boost = query.Boost;
                GetPayloads(payloads, stq);
            }
            else if (query is SpanQuery)
            {
                GetPayloads(payloads, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                QueryToSpanQuery(((FilteredQuery)query).Query, payloads);
            }
            else if (query is DisjunctionMaxQuery)
            {
                for (IEnumerator <Query> iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext();)
                {
                    QueryToSpanQuery(iterator.Current, payloads);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq = (MultiPhraseQuery)query;
                System.Collections.Generic.IList <Term[]> termArrays = mpq.GetTermArrays();
                int[] positions = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    IList <Query>[] disjunctLists     = new IList <Query> [maxPosition + 1];
                    int             distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]        termArray = termArrays[i];
                        IList <Query> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <Query>(termArray.Length));
                            ++distinctPositions;
                        }
                        foreach (Term term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        IList <Query> disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery((SpanQuery[])(disjuncts.ToArray()));
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    GetPayloads(payloads, sp);
                }
            }
        }
Exemplo n.º 19
0
            public override Query Rewrite(IndexReader reader)
            {
                // ArrayList spanClauses = new ArrayList();
                if (contents is TermQuery)
                {
                    return(contents);
                }
                // Build a sequence of Span clauses arranged in a SpanNear - child
                // clauses can be complex
                // Booleans e.g. nots and ors etc
                int numNegatives = 0;

                if (!(contents is BooleanQuery))
                {
                    throw new ArgumentException("Unknown query type \""
                                                + contents.GetType().Name
                                                + "\" found in phrase query string \"" + phrasedQueryStringContents
                                                + "\"");
                }
                BooleanQuery bq = (BooleanQuery)contents;

                BooleanClause[] bclauses       = bq.GetClauses();
                SpanQuery[]     allSpanClauses = new SpanQuery[bclauses.Length];
                // For all clauses e.g. one* two~
                for (int i = 0; i < bclauses.Length; i++)
                {
                    // HashSet bclauseterms=new HashSet();
                    Query qc = bclauses[i].Query;
                    // Rewrite this clause e.g one* becomes (one OR onerous)
                    qc = qc.Rewrite(reader);
                    if (bclauses[i].Occur.Equals(Occur.MUST_NOT))
                    {
                        numNegatives++;
                    }

                    if (qc is BooleanQuery booleanQuery)
                    {
                        IList <SpanQuery> sc = new JCG.List <SpanQuery>();
                        AddComplexPhraseClause(sc, booleanQuery);
                        if (sc.Count > 0)
                        {
                            allSpanClauses[i] = sc[0];
                        }
                        else
                        {
                            // Insert fake term e.g. phrase query was for "Fred Smithe*" and
                            // there were no "Smithe*" terms - need to
                            // prevent match on just "Fred".
                            allSpanClauses[i] = new SpanTermQuery(new Term(field,
                                                                           "Dummy clause because no terms found - must match nothing"));
                        }
                    }
                    else
                    {
                        if (qc is TermQuery tq)
                        {
                            allSpanClauses[i] = new SpanTermQuery(tq.Term);
                        }
                        else
                        {
                            throw new ArgumentException("Unknown query type \""
                                                        + qc.GetType().Name
                                                        + "\" found in phrase query string \""
                                                        + phrasedQueryStringContents + "\"");
                        }
                    }
                }
                if (numNegatives == 0)
                {
                    // The simple case - no negative elements in phrase
                    return(new SpanNearQuery(allSpanClauses, slopFactor, inOrder));
                }
                // Complex case - we have mixed positives and negatives in the
                // sequence.
                // Need to return a SpanNotQuery
                JCG.List <SpanQuery> positiveClauses = new JCG.List <SpanQuery>();
                for (int j = 0; j < allSpanClauses.Length; j++)
                {
                    if (!bclauses[j].Occur.Equals(Occur.MUST_NOT))
                    {
                        positiveClauses.Add(allSpanClauses[j]);
                    }
                }

                SpanQuery[] includeClauses = positiveClauses
                                             .ToArray();

                SpanQuery include; // LUCENENET: IDE0059: Remove unnecessary value assignment

                if (includeClauses.Length == 1)
                {
                    include = includeClauses[0]; // only one positive clause
                }
                else
                {
                    // need to increase slop factor based on gaps introduced by
                    // negatives
                    include = new SpanNearQuery(includeClauses, slopFactor + numNegatives,
                                                inOrder);
                }
                // Use sequence of positive and negative values as the exclude.
                SpanNearQuery exclude = new SpanNearQuery(allSpanClauses, slopFactor,
                                                          inOrder);
                SpanNotQuery snot = new SpanNotQuery(include, exclude);

                return(snot);
            }
        public virtual void TestQuery()
        {
            PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new MaxPayloadFunction());
            QueryUtils.Check(boostingFuncTermQuery);

            SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));

            Assert.IsTrue(boostingFuncTermQuery.Equals(spanTermQuery) == spanTermQuery.Equals(boostingFuncTermQuery));

            PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new AveragePayloadFunction());

            QueryUtils.CheckUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
        }