public void TestKeepsLastFilter() { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); df.KeepMode = (KeepMode.KM_USE_LAST_OCCURRENCE); ScoreDoc[] hits = searcher.Search(tq, df, 1000).ScoreDocs; assertTrue("Filtered searching should have found some matches", hits.Length > 0); foreach (ScoreDoc hit in hits) { Document d = searcher.Doc(hit.Doc); string url = d.Get(KEY_FIELD); DocsEnum td = TestUtil.Docs(Random(), reader, KEY_FIELD, new BytesRef(url), MultiFields.GetLiveDocs(reader), null, 0); int lastDoc = 0; while (td.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { lastDoc = td.DocID(); } assertEquals("Duplicate urls should return last doc", lastDoc, hit.Doc); } }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { AtomicReader reader = context.AtomicReader; FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time Fields fields = reader.Fields; BytesRef spare = new BytesRef(this.termsBytes); if (fields == null) { return(result); } Terms terms = null; TermsEnum termsEnum = null; DocsEnum docs = null; foreach (TermsAndField termsAndField in this.termsAndFields) { if ((terms = fields.Terms(termsAndField.field)) != null) { termsEnum = terms.Iterator(termsEnum); // this won't return null for (int i = termsAndField.start; i < termsAndField.end; i++) { spare.Offset = offsets[i]; spare.Length = offsets[i + 1] - offsets[i]; if (termsEnum.SeekExact(spare)) { docs = termsEnum.Docs(acceptDocs, docs, DocsEnum.FLAG_NONE); // no freq since we don't need them if (result == null) { if (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { result = new FixedBitSet(reader.MaxDoc); // lazy init but don't do it in the hot loop since we could read many docs result.Set(docs.DocID()); } } while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { result.Set(docs.DocID()); } } } } } return(result); }
public override int GetOrdinal(FacetLabel cp) { EnsureOpen(); if (cp.Length == 0) { return(ROOT_ORDINAL); } // First try to find the answer in the LRU cache: // LUCENENET: Lock was removed here because the underlying cache is thread-safe, // and removing the lock seems to make the performance better. IntClass res = ordinalCache.Get(cp); if (res != null && res.IntItem != null) { if ((int)res.IntItem.Value < indexReader.MaxDoc) { // Since the cache is shared with DTR instances allocated from // doOpenIfChanged, we need to ensure that the ordinal is one that // this DTR instance recognizes. return((int)res.IntItem.Value); } else { // if we get here, it means that the category was found in the cache, // but is not recognized by this TR instance. Therefore there's no // need to continue search for the path on disk, because we won't find // it there too. return(TaxonomyReader.INVALID_ORDINAL); } } // If we're still here, we have a cache miss. We need to fetch the // value from disk, and then also put it in the cache: int ret = TaxonomyReader.INVALID_ORDINAL; DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0); if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ret = docs.DocID(); // we only store the fact that a category exists, not its inexistence. // This is required because the caches are shared with new DTR instances // that are allocated from doOpenIfChanged. Therefore, if we only store // information about found categories, we cannot accidently tell a new // generation of DTR that a category does not exist. // LUCENENET: Lock was removed here because the underlying cache is thread-safe, // and removing the lock seems to make the performance better. ordinalCache.Put(cp, new IntClass { IntItem = Convert.ToInt32(ret) }); } return(ret); }
public void TestDocsEnumStart() { Analyzer analyzer = new MockAnalyzer(Random()); MemoryIndex memory = new MemoryIndex(Random().nextBoolean(), Random().nextInt(50) * 1024 * 1024); memory.AddField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader; DocsEnum disi = TestUtil.Docs(Random(), reader, "foo", new BytesRef("bar"), null, null, DocsEnum.FLAG_NONE); int docid = disi.DocID(); assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = reader.Terms("foo").Iterator(null); assertTrue(te.SeekExact(new BytesRef("bar"))); disi = te.Docs(null, disi, DocsEnum.FLAG_NONE); docid = disi.DocID(); assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); }
/// <summary> /// checks docs + freqs, sequentially /// </summary> public virtual void AssertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs) { if (leftDocs == null) { Assert.IsNull(rightDocs); return; } Assert.AreEqual(-1, leftDocs.DocID()); Assert.AreEqual(-1, rightDocs.DocID()); int docid; while ((docid = leftDocs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(docid, rightDocs.NextDoc()); // we don't assert freqs, they are allowed to be different } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, rightDocs.NextDoc()); }
private void DuellReaders(CompositeReader other, AtomicReader memIndexReader) { AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other); Fields memFields = memIndexReader.Fields; foreach (string field in competitor.Fields) { Terms memTerms = memFields.Terms(field); Terms iwTerms = memIndexReader.Terms(field); if (iwTerms == null) { assertNull(memTerms); } else { NumericDocValues normValues = competitor.GetNormValues(field); NumericDocValues memNormValues = memIndexReader.GetNormValues(field); if (normValues != null) { // mem idx always computes norms on the fly assertNotNull(memNormValues); assertEquals(normValues.Get(0), memNormValues.Get(0)); } assertNotNull(memTerms); assertEquals(iwTerms.DocCount, memTerms.DocCount); assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq); assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq); TermsEnum iwTermsIter = iwTerms.Iterator(null); TermsEnum memTermsIter = memTerms.Iterator(null); if (iwTerms.HasPositions()) { bool offsets = iwTerms.HasOffsets() && memTerms.HasOffsets(); while (iwTermsIter.Next() != null) { assertNotNull(memTermsIter.Next()); assertEquals(iwTermsIter.Term(), memTermsIter.Term()); DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.DocsAndPositions(null, null); DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq()); for (int i = 0; i < iwDocsAndPos.Freq(); i++) { assertEquals("term: " + iwTermsIter.Term().Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition()); if (offsets) { assertEquals(iwDocsAndPos.StartOffset(), memDocsAndPos.StartOffset()); assertEquals(iwDocsAndPos.EndOffset(), memDocsAndPos.EndOffset()); } } } } } else { while (iwTermsIter.Next() != null) { assertEquals(iwTermsIter.Term(), memTermsIter.Term()); DocsEnum iwDocsAndPos = iwTermsIter.Docs(null, null); DocsEnum memDocsAndPos = memTermsIter.Docs(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq()); } } } } } }
public override int DocID() { return(TermDocsEnum.DocID()); }