Example #1
0
        public void TestKeepsLastFilter()
        {
            DuplicateFilter df = new DuplicateFilter(KEY_FIELD);

            df.KeepMode = (KeepMode.KM_USE_LAST_OCCURRENCE);
            ScoreDoc[] hits = searcher.Search(tq, df, 1000).ScoreDocs;
            assertTrue("Filtered searching should have found some matches", hits.Length > 0);
            foreach (ScoreDoc hit in hits)
            {
                Document d   = searcher.Doc(hit.Doc);
                string   url = d.Get(KEY_FIELD);
                DocsEnum td  = TestUtil.Docs(Random(), reader,
                                             KEY_FIELD,
                                             new BytesRef(url),
                                             MultiFields.GetLiveDocs(reader),
                                             null,
                                             0);

                int lastDoc = 0;
                while (td.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    lastDoc = td.DocID;
                }
                assertEquals("Duplicate urls should return last doc", lastDoc, hit.Doc);
            }
        }
Example #2
0
        public virtual void TestWickedLongTerm()
        {
            using RAMDirectory dir = new RAMDirectory();
            char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
            Arrays.Fill(chars, 'x');

            string   bigTerm = new string(chars);
            Document doc     = new Document();

            using (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))))
            {
                // This produces a too-long term:
                string contents = "abc xyz x" + bigTerm + " another term";
                doc.Add(new TextField("content", contents, Field.Store.NO));
                writer.AddDocument(doc);

                // Make sure we can add another normal document
                doc = new Document();
                doc.Add(new TextField("content", "abc bbb ccc", Field.Store.NO));
                writer.AddDocument(doc);
            }
#pragma warning disable 612, 618
            using (IndexReader reader = IndexReader.Open(dir))
#pragma warning restore 612, 618
            {
                // Make sure all terms < max size were indexed
                assertEquals(2, reader.DocFreq(new Term("content", "abc")));
                assertEquals(1, reader.DocFreq(new Term("content", "bbb")));
                assertEquals(1, reader.DocFreq(new Term("content", "term")));
                assertEquals(1, reader.DocFreq(new Term("content", "another")));

                // Make sure position is still incremented when
                // massive term is skipped:
                DocsAndPositionsEnum tps = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "content", new BytesRef("another"));
                assertTrue(tps.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                assertEquals(1, tps.Freq);
                assertEquals(3, tps.NextPosition());

                // Make sure the doc that has the massive term is in
                // the index:
                assertEquals("document with wicked long term should is not in the index!", 2, reader.NumDocs);
            }

            // Make sure we can add a document with exactly the
            // maximum length term, and search on that term:
            doc = new Document();
            doc.Add(new TextField("content", bigTerm, Field.Store.NO));
            ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
            sa.MaxTokenLength = 100000;
            using (var writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)))
            {
                writer.AddDocument(doc);
            }
#pragma warning disable 612, 618
            using (var reader = IndexReader.Open(dir))
#pragma warning restore 612, 618
            {
                assertEquals(1, reader.DocFreq(new Term("content", bigTerm)));
            }
        }
Example #3
0
        private void VerifyTestObjects(int count, int maxDeletedExclusive)
        {
            Assert.AreEqual(count - maxDeletedExclusive, writer.NumDocs);

            if (maxDeletedExclusive == 0)
            {
                using (IndexReader reader = DirectoryReader.Open(dir))
                {
                    IBits liveDocs = MultiFields.GetLiveDocs(reader);
                    if (liveDocs == null)
                    {
                        //If no records are deleted, liveDocs will be null
                        Assert.AreEqual(0, maxDeletedExclusive);
                        return;
                    }

                    for (int i = 0; i < count; i++)
                    {
                        if (i < maxDeletedExclusive)
                        {
                            Assert.True(!liveDocs.Get(i));
                        }
                        else
                        {
                            Assert.False(!liveDocs.Get(i));
                            Document   doc = reader.Document(i);
                            TestObject obj = doc.ToObject <TestObject>();
                            Assert.AreEqual(i, obj.Number);
                            Assert.AreEqual(String.Format("Test Object {0}", i), obj.String);
                        }
                    }
                }
            }
        }
Example #4
0
 protected internal ValueSourceScorer(IndexReader reader, FunctionValues values)
     : base(null)
 {
     this.reader   = reader;
     this.maxDoc   = reader.MaxDoc;
     this.values   = values;
     CheckDeletes  = true;
     this.liveDocs = MultiFields.GetLiveDocs(reader);
 }
Example #5
0
 /// <summary>
 /// Creates an iterator over term, weight and payload fields from the lucene
 /// index. setting <see cref="HasPayloads"/> to false, implies an iterator
 /// over only term and weight.
 /// </summary>
 public DocumentInputIterator(DocumentDictionary outerInstance, bool hasPayloads, bool hasContexts)
 {
     this.outerInstance = outerInstance;
     this.hasPayloads   = hasPayloads;
     this.hasContexts   = hasContexts;
     docCount           = outerInstance.reader.MaxDoc - 1;
     weightValues       = (outerInstance.weightField != null) ? MultiDocValues.GetNumericValues(outerInstance.reader, outerInstance.weightField) : null;
     liveDocs           = (outerInstance.reader.Leaves.Count > 0) ? MultiFields.GetLiveDocs(outerInstance.reader) : null;
     relevantFields     = GetRelevantFields(new string[] { outerInstance.field, outerInstance.weightField, outerInstance.payloadField, outerInstance.contextsField });
 }
Example #6
0
        private FixedBitSet CreateExpectedResult(string queryValue, bool from, IndexReader topLevelReader,
                                                 IndexIterationContext context)
        {
            IDictionary <string, IList <RandomDoc> > randomValueDocs;
            IDictionary <string, IList <RandomDoc> > linkValueDocuments;

            if (from)
            {
                randomValueDocs    = context.RandomValueFromDocs;
                linkValueDocuments = context.ToDocuments;
            }
            else
            {
                randomValueDocs    = context.RandomValueToDocs;
                linkValueDocuments = context.FromDocuments;
            }

            FixedBitSet expectedResult = new FixedBitSet(topLevelReader.MaxDoc);

            if (!randomValueDocs.TryGetValue(queryValue, out IList <RandomDoc> matchingDocs) || matchingDocs == null)
            {
                return(new FixedBitSet(topLevelReader.MaxDoc));
            }

            foreach (RandomDoc matchingDoc in matchingDocs)
            {
                foreach (string linkValue in matchingDoc.linkValues)
                {
                    if (!linkValueDocuments.TryGetValue(linkValue, out IList <RandomDoc> otherMatchingDocs) || otherMatchingDocs == null)
                    {
                        continue;
                    }

                    foreach (RandomDoc otherSideDoc in otherMatchingDocs)
                    {
                        DocsEnum docsEnum = MultiFields.GetTermDocsEnum(topLevelReader,
                                                                        MultiFields.GetLiveDocs(topLevelReader), "id", new BytesRef(otherSideDoc.id), 0);
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(docsEnum != null);
                        }
                        int doc = docsEnum.NextDoc();
                        expectedResult.Set(doc);
                    }
                }
            }
            return(expectedResult);
        }
Example #7
0
        public virtual void TestCaching()
        {
            Directory         dir    = new RAMDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document    doc    = new Document();
            TokenStream stream = new TokenStreamAnonymousInnerClassHelper(this);

            stream = new CachingTokenFilter(stream);

            doc.Add(new TextField("preanalyzed", stream));

            // 1) we consume all tokens twice before we add the doc to the index
            CheckTokens(stream);
            stream.Reset();
            CheckTokens(stream);

            // 2) now add the document to the index and verify if all tokens are indexed
            //    don't reset the stream here, the DocumentWriter should do that implicitly
            writer.AddDocument(doc);

            IndexReader          reader        = writer.GetReader();
            DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term1"));

            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, termPositions.Freq);
            Assert.AreEqual(0, termPositions.NextPosition());

            termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term2"));
            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(2, termPositions.Freq);
            Assert.AreEqual(1, termPositions.NextPosition());
            Assert.AreEqual(3, termPositions.NextPosition());

            termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term3"));
            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, termPositions.Freq);
            Assert.AreEqual(2, termPositions.NextPosition());
            reader.Dispose();
            writer.Dispose();
            // 3) reset stream and consume tokens again
            stream.Reset();
            CheckTokens(stream);
            dir.Dispose();
        }
Example #8
0
        public virtual void TestMutipleDocument()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new KeywordAnalyzer()));
            Document     doc    = new Document();

            doc.Add(new TextField("partnum", "Q36", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(new TextField("partnum", "Q37", Field.Store.YES));
            writer.AddDocument(doc);
            writer.Dispose();

            IndexReader reader = DirectoryReader.Open(dir);
            DocsEnum    td     = TestUtil.Docs(Random(), reader, "partnum", new BytesRef("Q36"), MultiFields.GetLiveDocs(reader), null, 0);

            assertTrue(td.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            td = TestUtil.Docs(Random(), reader, "partnum", new BytesRef("Q37"), MultiFields.GetLiveDocs(reader), null, 0);
            assertTrue(td.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
        }
Example #9
0
        public virtual void TestSetPosition()
        {
            Analyzer          analyzer = new AnalyzerAnonymousClass(this);
            Directory         store    = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(Random, store, analyzer);
            Document          d        = new Document();

            d.Add(NewTextField("field", "bogus", Field.Store.YES));
            writer.AddDocument(d);
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1"));

            pos.NextDoc();
            // first token should be at position 0
            Assert.AreEqual(0, pos.NextPosition());

            pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2"));
            pos.NextDoc();
            // second token should be at position 2
            Assert.AreEqual(2, pos.NextPosition());

            PhraseQuery q;

            ScoreDoc[] hits;

            q = new PhraseQuery();
            q.Add(new Term("field", "1"));
            q.Add(new Term("field", "2"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // same as previous, just specify positions explicitely.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 1);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // specifying correct positions should find the phrase.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 2);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "3"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // phrase query would find it when correct positions are specified.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "4"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // phrase query should fail for non existing searched term
            // even if there exist another searched terms in the same searched position.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "9"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // multi-phrase query should succed for non existing searched term
            // because there exist another searched terms in the same searched position.
            MultiPhraseQuery mq = new MultiPhraseQuery();

            mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0);
            hits = searcher.Search(mq, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "4"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            store.Dispose();
        }
Example #10
0
 /// <summary>
 /// Creates an iterator over term, weight and payload fields from the lucene
 /// index. Setting <paramref name="hasPayloads"/> to <c>false</c>, implies an enumerator
 /// over only term and weight.
 /// </summary>
 public DocumentInputEnumerator(DocumentDictionary documentDictionary, bool hasPayloads, bool hasContexts)
 {
     this.outerInstance = documentDictionary;
     this.hasPayloads   = hasPayloads;
     this.hasContexts   = hasContexts;
     docCount           = documentDictionary.m_reader.MaxDoc - 1;
     weightValues       = (documentDictionary.weightField != null) ? MultiDocValues.GetNumericValues(documentDictionary.m_reader, documentDictionary.weightField) : null;
     liveDocs           = (documentDictionary.m_reader.Leaves.Count > 0) ? MultiFields.GetLiveDocs(documentDictionary.m_reader) : null;
     relevantFields     = GetRelevantFields(new string[] { documentDictionary.field, documentDictionary.weightField, documentDictionary.m_payloadField, documentDictionary.m_contextsField });
 }
Example #11
0
        public override int DoLogic()
        {
            int res = 0;

            // open reader or use existing one
            IndexSearcher searcher = RunData.GetIndexSearcher();

            IndexReader reader;

            bool closeSearcher;

            if (searcher == null)
            {
                // open our own reader
                Directory dir = RunData.Directory;
                reader        = DirectoryReader.Open(dir);
                searcher      = new IndexSearcher(reader);
                closeSearcher = true;
            }
            else
            {
                // use existing one; this passes +1 ref to us
                reader        = searcher.IndexReader;
                closeSearcher = false;
            }

            // optionally warm and add num docs traversed to count
            if (WithWarm)
            {
                Document doc      = null;
                IBits    liveDocs = MultiFields.GetLiveDocs(reader);
                for (int m = 0; m < reader.MaxDoc; m++)
                {
                    if (null == liveDocs || liveDocs.Get(m))
                    {
                        doc  = reader.Document(m);
                        res += (doc == null ? 0 : 1);
                    }
                }
            }

            if (WithSearch)
            {
                res++;
                Query   q       = queryMaker.MakeQuery();
                Sort    sort    = Sort;
                TopDocs hits    = null;
                int     numHits = NumHits;
                if (numHits > 0)
                {
                    if (WithCollector == false)
                    {
                        if (sort != null)
                        {
                            // TODO: instead of always passing false we
                            // should detect based on the query; if we make
                            // the IndexSearcher search methods that take
                            // Weight public again, we can go back to
                            // pulling the Weight ourselves:
                            TopFieldCollector collector = TopFieldCollector.Create(sort, numHits,
                                                                                   true, WithScore,
                                                                                   WithMaxScore,
                                                                                   false);
                            searcher.Search(q, null, collector);
                            hits = collector.GetTopDocs();
                        }
                        else
                        {
                            hits = searcher.Search(q, numHits);
                        }
                    }
                    else
                    {
                        ICollector collector = CreateCollector();
                        searcher.Search(q, null, collector);
                        //hits = collector.topDocs();
                    }

                    string printHitsField = RunData.Config.Get("print.hits.field", null);
                    if (hits != null && printHitsField != null && printHitsField.Length > 0)
                    {
                        Console.WriteLine("totalHits = " + hits.TotalHits);
                        Console.WriteLine("maxDoc()  = " + reader.MaxDoc);
                        Console.WriteLine("numDocs() = " + reader.NumDocs);
                        for (int i = 0; i < hits.ScoreDocs.Length; i++)
                        {
                            int      docID = hits.ScoreDocs[i].Doc;
                            Document doc   = reader.Document(docID);
                            Console.WriteLine("  " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField));
                        }
                    }

                    if (WithTraverse)
                    {
                        ScoreDoc[] scoreDocs     = hits.ScoreDocs;
                        int        traversalSize = Math.Min(scoreDocs.Length, TraversalSize);

                        if (traversalSize > 0)
                        {
                            bool                 retrieve     = WithRetrieve;
                            int                  numHighlight = Math.Min(NumToHighlight, scoreDocs.Length);
                            Analyzer             analyzer     = RunData.Analyzer;
                            BenchmarkHighlighter highlighter  = null;
                            if (numHighlight > 0)
                            {
                                highlighter = GetBenchmarkHighlighter(q);
                            }
                            for (int m = 0; m < traversalSize; m++)
                            {
                                int id = scoreDocs[m].Doc;
                                res++;
                                if (retrieve)
                                {
                                    Document document = RetrieveDoc(reader, id);
                                    res += document != null ? 1 : 0;
                                    if (numHighlight > 0 && m < numHighlight)
                                    {
                                        ICollection <string> fieldsToHighlight = GetFieldsToHighlight(document);
                                        foreach (string field in fieldsToHighlight)
                                        {
                                            string text = document.Get(field);
                                            res += highlighter.DoHighlight(reader, id, field, document, analyzer, text);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            if (closeSearcher)
            {
                reader.Dispose();
            }
            else
            {
                // Release our +1 ref from above
                reader.DecRef();
            }
            return(res);
        }