public void RestDocsAndPositionsEnumStart() { Analyzer analyzer = new MockAnalyzer(Random()); int numIters = AtLeast(3); MemoryIndex memory = new MemoryIndex(true, Random().nextInt(50) * 1024 * 1024); for (int i = 0; i < numIters; i++) { // check reuse memory.AddField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader; assertEquals(1, reader.Terms("foo").SumTotalTermFreq); DocsAndPositionsEnum disi = reader.TermPositionsEnum(new Term("foo", "bar")); int docid = disi.DocID(); assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(0, disi.NextPosition()); assertEquals(0, disi.StartOffset()); assertEquals(3, disi.EndOffset()); // now reuse and check again TermsEnum te = reader.Terms("foo").Iterator(null); assertTrue(te.SeekExact(new BytesRef("bar"))); disi = te.DocsAndPositions(null, disi); docid = disi.DocID(); assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); memory.Reset(); } }
/// <summary> /// checks docs + freqs + positions + payloads, sequentially /// </summary> public virtual void AssertDocsAndPositionsEnum(DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) { if (leftDocs == null || rightDocs == null) { Assert.IsNull(leftDocs); Assert.IsNull(rightDocs); return; } Assert.AreEqual(-1, leftDocs.DocID()); Assert.AreEqual(-1, rightDocs.DocID()); int docid; while ((docid = leftDocs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(docid, rightDocs.NextDoc()); int freq = leftDocs.Freq(); Assert.AreEqual(freq, rightDocs.Freq()); for (int i = 0; i < freq; i++) { Assert.AreEqual(leftDocs.NextPosition(), rightDocs.NextPosition()); // we don't assert offsets/payloads, they are allowed to be different } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, rightDocs.NextDoc()); }
/// <summary> /// Read the parents of the new categories /// </summary> private void InitParents(IndexReader reader, int first) { if (reader.MaxDoc == first) { return; } // it's ok to use MultiFields because we only iterate on one posting list. // breaking it to loop over the leaves() only complicates code for no // apparent gain. DocsAndPositionsEnum positions = MultiFields.GetTermPositionsEnum(reader, null, Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF, DocsAndPositionsEnum.FLAG_PAYLOADS); // shouldn't really happen, if it does, something's wrong if (positions == null || positions.Advance(first) == DocIdSetIterator.NO_MORE_DOCS) { throw new CorruptIndexException("Missing parent data for category " + first); } int num = reader.MaxDoc; for (int i = first; i < num; i++) { if (positions.DocID() == i) { if (positions.Freq() == 0) // shouldn't happen { throw new CorruptIndexException("Missing parent data for category " + i); } parents[i] = positions.NextPosition(); if (positions.NextDoc() == DocIdSetIterator.NO_MORE_DOCS) { if (i + 1 < num) { throw new CorruptIndexException("Missing parent data for category " + (i + 1)); } break; } } // this shouldn't happen else { throw new CorruptIndexException("Missing parent data for category " + i); } } }
private void DuellReaders(CompositeReader other, AtomicReader memIndexReader) { AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other); Fields memFields = memIndexReader.Fields; foreach (string field in competitor.Fields) { Terms memTerms = memFields.Terms(field); Terms iwTerms = memIndexReader.Terms(field); if (iwTerms == null) { assertNull(memTerms); } else { NumericDocValues normValues = competitor.GetNormValues(field); NumericDocValues memNormValues = memIndexReader.GetNormValues(field); if (normValues != null) { // mem idx always computes norms on the fly assertNotNull(memNormValues); assertEquals(normValues.Get(0), memNormValues.Get(0)); } assertNotNull(memTerms); assertEquals(iwTerms.DocCount, memTerms.DocCount); assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq); assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq); TermsEnum iwTermsIter = iwTerms.Iterator(null); TermsEnum memTermsIter = memTerms.Iterator(null); if (iwTerms.HasPositions()) { bool offsets = iwTerms.HasOffsets() && memTerms.HasOffsets(); while (iwTermsIter.Next() != null) { assertNotNull(memTermsIter.Next()); assertEquals(iwTermsIter.Term(), memTermsIter.Term()); DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.DocsAndPositions(null, null); DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq()); for (int i = 0; i < iwDocsAndPos.Freq(); i++) { assertEquals("term: " + iwTermsIter.Term().Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition()); if (offsets) { assertEquals(iwDocsAndPos.StartOffset(), memDocsAndPos.StartOffset()); assertEquals(iwDocsAndPos.EndOffset(), memDocsAndPos.EndOffset()); } } } } } else { while (iwTermsIter.Next() != null) { assertEquals(iwTermsIter.Term(), memTermsIter.Term()); DocsEnum iwDocsAndPos = iwTermsIter.Docs(null, null); DocsEnum memDocsAndPos = memTermsIter.Docs(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq()); } } } } } }