예제 #1
0
        public void TestKeepsLastFilter()
        {
            DuplicateFilter df = new DuplicateFilter(KEY_FIELD);

            df.KeepMode = (KeepMode.KM_USE_LAST_OCCURRENCE);
            ScoreDoc[] hits = searcher.Search(tq, df, 1000).ScoreDocs;
            assertTrue("Filtered searching should have found some matches", hits.Length > 0);
            foreach (ScoreDoc hit in hits)
            {
                Document d   = searcher.Doc(hit.Doc);
                string   url = d.Get(KEY_FIELD);
                DocsEnum td  = TestUtil.Docs(Random(), reader,
                                             KEY_FIELD,
                                             new BytesRef(url),
                                             MultiFields.GetLiveDocs(reader),
                                             null,
                                             0);

                int lastDoc = 0;
                while (td.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    lastDoc = td.DocID();
                }
                assertEquals("Duplicate urls should return last doc", lastDoc, hit.Doc);
            }
        }
예제 #2
0
        public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs)
        {
            AtomicReader reader = context.AtomicReader;
            FixedBitSet  result = null; // lazy init if needed - no need to create a big bitset ahead of time
            Fields       fields = reader.Fields;
            BytesRef     spare  = new BytesRef(this.termsBytes);

            if (fields == null)
            {
                return(result);
            }
            Terms     terms     = null;
            TermsEnum termsEnum = null;
            DocsEnum  docs      = null;

            foreach (TermsAndField termsAndField in this.termsAndFields)
            {
                if ((terms = fields.Terms(termsAndField.field)) != null)
                {
                    termsEnum = terms.Iterator(termsEnum); // this won't return null
                    for (int i = termsAndField.start; i < termsAndField.end; i++)
                    {
                        spare.Offset = offsets[i];
                        spare.Length = offsets[i + 1] - offsets[i];
                        if (termsEnum.SeekExact(spare))
                        {
                            docs = termsEnum.Docs(acceptDocs, docs, DocsEnum.FLAG_NONE); // no freq since we don't need them
                            if (result == null)
                            {
                                if (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    result = new FixedBitSet(reader.MaxDoc);
                                    // lazy init but don't do it in the hot loop since we could read many docs
                                    result.Set(docs.DocID());
                                }
                            }
                            while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                            {
                                result.Set(docs.DocID());
                            }
                        }
                    }
                }
            }
            return(result);
        }
예제 #3
0
        public override int GetOrdinal(FacetLabel cp)
        {
            EnsureOpen();
            if (cp.Length == 0)
            {
                return(ROOT_ORDINAL);
            }

            // First try to find the answer in the LRU cache:

            // LUCENENET: Lock was removed here because the underlying cache is thread-safe,
            // and removing the lock seems to make the performance better.
            IntClass res = ordinalCache.Get(cp);

            if (res != null && res.IntItem != null)
            {
                if ((int)res.IntItem.Value < indexReader.MaxDoc)
                {
                    // Since the cache is shared with DTR instances allocated from
                    // doOpenIfChanged, we need to ensure that the ordinal is one that
                    // this DTR instance recognizes.
                    return((int)res.IntItem.Value);
                }
                else
                {
                    // if we get here, it means that the category was found in the cache,
                    // but is not recognized by this TR instance. Therefore there's no
                    // need to continue search for the path on disk, because we won't find
                    // it there too.
                    return(TaxonomyReader.INVALID_ORDINAL);
                }
            }

            // If we're still here, we have a cache miss. We need to fetch the
            // value from disk, and then also put it in the cache:
            int      ret  = TaxonomyReader.INVALID_ORDINAL;
            DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0);

            if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                ret = docs.DocID();

                // we only store the fact that a category exists, not its inexistence.
                // This is required because the caches are shared with new DTR instances
                // that are allocated from doOpenIfChanged. Therefore, if we only store
                // information about found categories, we cannot accidently tell a new
                // generation of DTR that a category does not exist.

                // LUCENENET: Lock was removed here because the underlying cache is thread-safe,
                // and removing the lock seems to make the performance better.
                ordinalCache.Put(cp, new IntClass {
                    IntItem = Convert.ToInt32(ret)
                });
            }

            return(ret);
        }
예제 #4
0
        public void TestDocsEnumStart()
        {
            Analyzer    analyzer = new MockAnalyzer(Random());
            MemoryIndex memory   = new MemoryIndex(Random().nextBoolean(), Random().nextInt(50) * 1024 * 1024);

            memory.AddField("foo", "bar", analyzer);
            AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader;
            DocsEnum     disi   = TestUtil.Docs(Random(), reader, "foo", new BytesRef("bar"), null, null, DocsEnum.FLAG_NONE);
            int          docid  = disi.DocID();

            assertEquals(-1, docid);
            assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            // now reuse and check again
            TermsEnum te = reader.Terms("foo").Iterator(null);

            assertTrue(te.SeekExact(new BytesRef("bar")));
            disi  = te.Docs(null, disi, DocsEnum.FLAG_NONE);
            docid = disi.DocID();
            assertEquals(-1, docid);
            assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            reader.Dispose();
        }
예제 #5
0
        /// <summary>
        /// checks docs + freqs, sequentially
        /// </summary>
        public virtual void AssertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs)
        {
            if (leftDocs == null)
            {
                Assert.IsNull(rightDocs);
                return;
            }
            Assert.AreEqual(-1, leftDocs.DocID());
            Assert.AreEqual(-1, rightDocs.DocID());
            int docid;

            while ((docid = leftDocs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
            {
                Assert.AreEqual(docid, rightDocs.NextDoc());
                // we don't assert freqs, they are allowed to be different
            }
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, rightDocs.NextDoc());
        }
예제 #6
0
        private void DuellReaders(CompositeReader other, AtomicReader memIndexReader)
        {
            AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other);
            Fields       memFields  = memIndexReader.Fields;

            foreach (string field in competitor.Fields)
            {
                Terms memTerms = memFields.Terms(field);
                Terms iwTerms  = memIndexReader.Terms(field);
                if (iwTerms == null)
                {
                    assertNull(memTerms);
                }
                else
                {
                    NumericDocValues normValues    = competitor.GetNormValues(field);
                    NumericDocValues memNormValues = memIndexReader.GetNormValues(field);
                    if (normValues != null)
                    {
                        // mem idx always computes norms on the fly
                        assertNotNull(memNormValues);
                        assertEquals(normValues.Get(0), memNormValues.Get(0));
                    }

                    assertNotNull(memTerms);
                    assertEquals(iwTerms.DocCount, memTerms.DocCount);
                    assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq);
                    assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq);
                    TermsEnum iwTermsIter  = iwTerms.Iterator(null);
                    TermsEnum memTermsIter = memTerms.Iterator(null);
                    if (iwTerms.HasPositions())
                    {
                        bool offsets = iwTerms.HasOffsets() && memTerms.HasOffsets();

                        while (iwTermsIter.Next() != null)
                        {
                            assertNotNull(memTermsIter.Next());
                            assertEquals(iwTermsIter.Term(), memTermsIter.Term());
                            DocsAndPositionsEnum iwDocsAndPos  = iwTermsIter.DocsAndPositions(null, null);
                            DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq());
                                for (int i = 0; i < iwDocsAndPos.Freq(); i++)
                                {
                                    assertEquals("term: " + iwTermsIter.Term().Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition());
                                    if (offsets)
                                    {
                                        assertEquals(iwDocsAndPos.StartOffset(), memDocsAndPos.StartOffset());
                                        assertEquals(iwDocsAndPos.EndOffset(), memDocsAndPos.EndOffset());
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        while (iwTermsIter.Next() != null)
                        {
                            assertEquals(iwTermsIter.Term(), memTermsIter.Term());
                            DocsEnum iwDocsAndPos  = iwTermsIter.Docs(null, null);
                            DocsEnum memDocsAndPos = memTermsIter.Docs(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq());
                            }
                        }
                    }
                }
            }
        }
예제 #7
0
 public override int DocID()
 {
     return(TermDocsEnum.DocID());
 }