public virtual void TestReuseDocsEnumDifferentReader() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random); writer.Commit(); DirectoryReader firstReader = DirectoryReader.Open(dir); DirectoryReader secondReader = DirectoryReader.Open(dir); IList <AtomicReaderContext> leaves = firstReader.Leaves; IList <AtomicReaderContext> leaves2 = secondReader.Leaves; foreach (AtomicReaderContext ctx in leaves) { Terms terms = ((AtomicReader)ctx.Reader).GetTerms("body"); TermsEnum iterator = terms.GetEnumerator(); IDictionary <DocsEnum, bool?> enums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default); MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc); iterator = terms.GetEnumerator(); DocsEnum docs = null; BytesRef term = null; while (iterator.MoveNext()) { term = iterator.Term; docs = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); iterator = terms.GetEnumerator(); enums.Clear(); docs = null; while (iterator.MoveNext()) { term = iterator.Term; docs = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); } IOUtils.Dispose(writer, firstReader, secondReader, dir); }
public virtual void TestDocsEnum() { IBits mappedLiveDocs = RandomLiveDocs(reader.MaxDoc); TermsEnum termsEnum = reader.GetTerms(DOCS_ENUM_FIELD).GetIterator(null); assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(DOCS_ENUM_TERM))); DocsEnum docs = termsEnum.Docs(mappedLiveDocs, null); int doc; int prev = -1; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { assertTrue("document " + doc + " marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(doc)); assertEquals("incorrect value; doc " + doc, sortedValues[doc], int.Parse(reader.Document(doc).Get(ID_FIELD))); while (++prev < doc) { assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev)); } } while (++prev < reader.MaxDoc) { assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev)); } DocsEnum reuse = docs; docs = termsEnum.Docs(mappedLiveDocs, reuse); if (docs is SortingAtomicReader.SortingDocsEnum) { assertTrue(((SortingAtomicReader.SortingDocsEnum)docs).Reused(reuse)); // make sure reuse worked } doc = -1; prev = -1; while ((doc = docs.Advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) { assertTrue("document " + doc + " marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(doc)); assertEquals("incorrect value; doc " + doc, sortedValues[doc], int.Parse(reader.Document(doc).Get(ID_FIELD))); while (++prev < doc) { assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev)); } } while (++prev < reader.MaxDoc) { assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev)); } }
public virtual void TestReuseDocsEnumSameBitsOrNull() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader open = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in open.Leaves) { Terms terms = ((AtomicReader)ctx.Reader).GetTerms("body"); TermsEnum iterator = terms.GetIterator(null); IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(open.MaxDoc); DocsEnum docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(bits, docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(1, enums.Count); enums.Clear(); iterator = terms.GetIterator(null); docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(new MatchNoBits(open.MaxDoc), docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); enums.Clear(); iterator = terms.GetIterator(null); docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(null, docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(1, enums.Count); } IOUtils.Close(writer, open, dir); }
public virtual void Reset() { // no one should call us for deleted docs? if (terms != null) { TermsEnum termsEnum = terms.GetEnumerator(); if (termsEnum.SeekExact(outerInstance.m_indexedBytes)) { docs = termsEnum.Docs(null, null); } else { docs = null; } } else { docs = null; } if (docs == null) { docs = new DocsEnumAnonymousClass(); } atDoc = -1; }
private int NextDocOutOfOrder() { while (true) { if (docsEnum != null) { int docId = DocsEnumNextDoc(); if (docId == DocIdSetIterator.NO_MORE_DOCS) { docsEnum = null; } else { return(_doc = docId); } } if (_upto == outerInstance._terms.Count) { return(_doc = DocIdSetIterator.NO_MORE_DOCS); } _scoreUpto = _upto; if (_termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[_upto++], _spare))) { docsEnum = _reuse = _termsEnum.Docs(_acceptDocs, _reuse, DocsFlags.NONE); } } }
private FixedBitSet FastBits(AtomicReader reader, IBits acceptDocs) { FixedBitSet bits = new FixedBitSet(reader.MaxDoc); bits.Set(0, reader.MaxDoc); //assume all are valid Terms terms = reader.Fields.GetTerms(fieldName); if (terms == null) { return(bits); } TermsEnum termsEnum = terms.GetIterator(null); DocsEnum docs = null; while (true) { BytesRef currTerm = termsEnum.Next(); if (currTerm == null) { break; } else { if (termsEnum.DocFreq > 1) { // unset potential duplicates docs = termsEnum.Docs(acceptDocs, docs, DocsFlags.NONE); int doc = docs.NextDoc(); if (doc != DocIdSetIterator.NO_MORE_DOCS) { if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) { doc = docs.NextDoc(); } } int lastDoc = -1; while (true) { lastDoc = doc; bits.Clear(lastDoc); doc = docs.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } } if (keepMode == KeepMode.KM_USE_LAST_OCCURRENCE) { // restore the last bit bits.Set(lastDoc); } } } } return(bits); }
/// <summary> /// Takes the categories from the given taxonomy directory, and adds the /// missing ones to this taxonomy. Additionally, it fills the given /// <see cref="IOrdinalMap"/> with a mapping from the original ordinal to the new /// ordinal. /// </summary> public virtual void AddTaxonomy(Directory taxoDir, IOrdinalMap map) { EnsureOpen(); DirectoryReader r = DirectoryReader.Open(taxoDir); try { int size = r.NumDocs; IOrdinalMap ordinalMap = map; ordinalMap.SetSize(size); int @base = 0; TermsEnum te = null; DocsEnum docs = null; foreach (AtomicReaderContext ctx in r.Leaves) { AtomicReader ar = ctx.AtomicReader; Terms terms = ar.GetTerms(Consts.FULL); te = terms.GetIterator(te); while (te.Next() != null) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(te.Term.Utf8ToString())); int ordinal = AddCategory(cp); docs = te.Docs(null, docs, DocsFlags.NONE); ordinalMap.AddMapping(docs.NextDoc() + @base, ordinal); } @base += ar.MaxDoc; // no deletions, so we're ok } ordinalMap.AddDone(); } finally { r.Dispose(); } }
public virtual void TestReuseDocsEnumNoReuse() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat(OLD_FORMAT_IMPERSONATION_IS_ACTIVE)); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader open = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in open.Leaves) { AtomicReader indexReader = (AtomicReader)ctx.Reader; Terms terms = indexReader.Terms("body"); TermsEnum iterator = terms.Iterator(null); IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc); while ((iterator.Next()) != null) { DocsEnum docs = iterator.Docs(Random().NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc), null, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); } IOUtils.Close(writer, open, dir); }
public virtual void TestReuseDocsEnumNoReuse() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random); writer.Commit(); DirectoryReader open = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in open.Leaves) { AtomicReader indexReader = (AtomicReader)ctx.Reader; Terms terms = indexReader.GetTerms("body"); TermsEnum iterator = terms.GetEnumerator(); IDictionary <DocsEnum, bool?> enums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default); MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc); while (iterator.MoveNext()) { DocsEnum docs = iterator.Docs(Random.NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc), null, Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); } IOUtils.Dispose(writer, open, dir); }
protected override void FillDocsAndScores(FixedBitSet matchingDocs, IBits acceptDocs, TermsEnum termsEnum) { BytesRef spare = new BytesRef(); DocsEnum docsEnum = null; for (int i = 0; i < m_outerInstance._terms.Count; i++) { if (termsEnum.SeekExact(m_outerInstance._terms.Get(m_outerInstance._ords[i], spare))) { docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsFlags.NONE); float score = m_outerInstance._scores[m_outerInstance._ords[i]]; for (int doc = docsEnum.NextDoc(); doc != NO_MORE_DOCS; doc = docsEnum.NextDoc()) { // I prefer this: /*if (scores[doc] < score) { * scores[doc] = score; * matchingDocs.set(doc); * }*/ // But this behaves the same as MVInnerScorer and only then the tests will pass: if (!matchingDocs.Get(doc)) { scores[doc] = score; matchingDocs.Set(doc); } } } } }
public virtual void TestReuseDocsEnumDifferentReader() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat(OLD_FORMAT_IMPERSONATION_IS_ACTIVE)); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader firstReader = DirectoryReader.Open(dir); DirectoryReader secondReader = DirectoryReader.Open(dir); IList <AtomicReaderContext> leaves = firstReader.Leaves; IList <AtomicReaderContext> leaves2 = secondReader.Leaves; foreach (AtomicReaderContext ctx in leaves) { Terms terms = ((AtomicReader)ctx.Reader).Terms("body"); TermsEnum iterator = terms.Iterator(null); IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc); iterator = terms.Iterator(null); DocsEnum docs = null; BytesRef term = null; while ((term = iterator.Next()) != null) { docs = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); iterator = terms.Iterator(null); enums.Clear(); docs = null; while ((term = iterator.Next()) != null) { docs = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); } IOUtils.Close(writer, firstReader, secondReader, dir); }
protected virtual void CollectDocs(FixedBitSet bitSet) { //WARN: keep this specialization in sync Debug.Assert(m_termsEnum != null); m_docsEnum = m_termsEnum.Docs(m_acceptDocs, m_docsEnum, DocsFlags.NONE); int docid; while ((docid = m_docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { bitSet.Set(docid); } }
/// <exception cref="System.IO.IOException"></exception> protected internal virtual void CollectDocs(FixedBitSet bitSet) { //WARN: keep this specialization in sync Debug.Assert(termsEnum != null); docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE ); int docid; while ((docid = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { bitSet.Set(docid); } }
public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs) { Debug.Assert(termStates.TopReaderContext == ReaderUtil.GetTopLevelContext(context), "The top-reader used to create Weight (" + termStates.TopReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.GetTopLevelContext(context)); TermsEnum termsEnum = GetTermsEnum(context); if (termsEnum == null) { return(null); } DocsEnum docs = termsEnum.Docs(acceptDocs, null); Debug.Assert(docs != null); return(new TermScorer(this, docs, similarity.GetSimScorer(stats, context))); }
private FixedBitSet CorrectBits(AtomicReader reader, IBits acceptDocs) { FixedBitSet bits = new FixedBitSet(reader.MaxDoc); //assume all are INvalid Terms terms = reader.Fields.GetTerms(fieldName); if (terms == null) { return(bits); } TermsEnum termsEnum = terms.GetIterator(null); DocsEnum docs = null; while (true) { BytesRef currTerm = termsEnum.Next(); if (currTerm == null) { break; } else { docs = termsEnum.Docs(acceptDocs, docs, DocsFlags.NONE); int doc = docs.NextDoc(); if (doc != DocIdSetIterator.NO_MORE_DOCS) { if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) { bits.Set(doc); } else { int lastDoc = doc; while (true) { lastDoc = doc; doc = docs.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } } bits.Set(lastDoc); } } } } return(bits); }
/// <summary> /// Returns a <see cref="DocIdSet"/> with documents that should be permitted in search /// results. /// </summary> public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) { AtomicReader reader = (context.AtomicReader); Fields fields = reader.Fields; if (fields is null) { // reader has no fields return(null); } Terms terms = fields.GetTerms(m_query.m_field); if (terms is null) { // field does not exist return(null); } TermsEnum termsEnum = m_query.GetTermsEnum(terms); if (Debugging.AssertsEnabled) { Debugging.Assert(termsEnum != null); } if (termsEnum.MoveNext()) { // fill into a FixedBitSet FixedBitSet bitSet = new FixedBitSet(context.AtomicReader.MaxDoc); DocsEnum docsEnum = null; do { // System.out.println(" iter termCount=" + termCount + " term=" + // enumerator.term().toBytesString()); docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsFlags.NONE); int docid; while ((docid = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { bitSet.Set(docid); } } while (termsEnum.MoveNext()); return(bitSet); } else { return(null); } }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) { AtomicReader reader = context.AtomicReader; FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time Fields fields = reader.Fields; BytesRef spare = new BytesRef(this.termsBytes); if (fields == null) { return(result); } Terms terms; // LUCENENET: IDE0059: Remove unnecessary value assignment TermsEnum termsEnum = null; DocsEnum docs = null; foreach (TermsAndField termsAndField in this.termsAndFields) { if ((terms = fields.GetTerms(termsAndField.field)) != null) { termsEnum = terms.GetEnumerator(termsEnum); // this won't return null for (int i = termsAndField.start; i < termsAndField.end; i++) { spare.Offset = offsets[i]; spare.Length = offsets[i + 1] - offsets[i]; if (termsEnum.SeekExact(spare)) { docs = termsEnum.Docs(acceptDocs, docs, DocsFlags.NONE); // no freq since we don't need them if (result == null) { if (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { result = new FixedBitSet(reader.MaxDoc); // lazy init but don't do it in the hot loop since we could read many docs result.Set(docs.DocID); } } while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { result.Set(docs.DocID); } } } } } return(result); }
public virtual ShapeFieldCache <T> GetCache(AtomicReader reader) { lock (locker) { ShapeFieldCache <T> idx; if (sidx.TryGetValue(reader, out idx) && idx != null) { return(idx); } /*long startTime = Runtime.CurrentTimeMillis(); * log.Fine("Building Cache [" + reader.MaxDoc() + "]");*/ idx = new ShapeFieldCache <T>(reader.MaxDoc, m_defaultSize); int count = 0; DocsEnum docs = null; Terms terms = reader.GetTerms(m_shapeField); TermsEnum te = null; if (terms != null) { te = terms.GetIterator(te); BytesRef term = te.Next(); while (term != null) { T shape = ReadShape(term); if (shape != null) { docs = te.Docs(null, docs, DocsFlags.NONE); int docid = docs.NextDoc(); while (docid != DocIdSetIterator.NO_MORE_DOCS) { idx.Add(docid, shape); docid = docs.NextDoc(); count++; } } term = te.Next(); } } sidx[reader] = idx; /*long elapsed = Runtime.CurrentTimeMillis() - startTime; * log.Fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);*/ return(idx); } }
public virtual void TestNestedPulsing() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new NestedPulsingPostingsFormat()); BaseDirectoryWrapper dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp)); Document doc = new Document(); doc.Add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO)); // note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm') // this is because we only track the 'last' enum we reused (not all). // but this seems 'good enough' for now. iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader segment = GetOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary <DocsEnum, bool?> allEnums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default); TermsEnum te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { reuse = te.Docs(null, reuse, DocsFlags.NONE); allEnums[reuse] = true; } assertEquals(4, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { posReuse = te.DocsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(4, allEnums.Count); ir.Dispose(); dir.Dispose(); }
public virtual void TestSophisticatedReuse() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1)); Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp)); Document doc = new Document(); doc.Add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO)); iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader segment = GetOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary <DocsEnum, bool?> allEnums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default); TermsEnum te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { reuse = te.Docs(null, reuse, DocsFlags.NONE); allEnums[reuse] = true; } assertEquals(2, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { posReuse = te.DocsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(2, allEnums.Count); ir.Dispose(); dir.Dispose(); }
private FixedBitSet CorrectBits(AtomicReader reader, IBits acceptDocs) { FixedBitSet bits = new FixedBitSet(reader.MaxDoc); //assume all are INvalid Terms terms = reader.Fields.GetTerms(fieldName); if (terms is null) { return(bits); } TermsEnum termsEnum = terms.GetEnumerator(); DocsEnum docs = null; while (termsEnum.MoveNext()) { docs = termsEnum.Docs(acceptDocs, docs, DocsFlags.NONE); int doc = docs.NextDoc(); if (doc != DocIdSetIterator.NO_MORE_DOCS) { if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) { bits.Set(doc); } else { int lastDoc /* = doc*/; // LUCENENET: Removed unnecessary assignment while (true) { lastDoc = doc; doc = docs.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } } bits.Set(lastDoc); } } } return(bits); }
public virtual DocsEnum RandomDocsEnum(string field, BytesRef term, IList <AtomicReaderContext> readers, IBits bits) { if (Random().Next(10) == 0) { return(null); } AtomicReader indexReader = (AtomicReader)readers[Random().Next(readers.Count)].Reader; Terms terms = indexReader.GetTerms(field); if (terms == null) { return(null); } TermsEnum iterator = terms.GetIterator(null); if (iterator.SeekExact(term)) { return(iterator.Docs(bits, null, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE)); } return(null); }
public virtual ShapeFieldCache <T> GetCache(AtomicReader reader) { // LUCENENET: ConditionalWeakTable allows us to simplify and remove locks return(sidx.GetValue(reader, (key) => { /*long startTime = Runtime.CurrentTimeMillis(); * log.Fine("Building Cache [" + reader.MaxDoc() + "]");*/ ShapeFieldCache <T> idx = new ShapeFieldCache <T>(key.MaxDoc, m_defaultSize); int count = 0; DocsEnum docs = null; Terms terms = ((AtomicReader)key).GetTerms(m_shapeField); TermsEnum te = null; if (terms != null) { te = terms.GetIterator(te); BytesRef term = te.Next(); while (term != null) { T shape = ReadShape(term); if (shape != null) { docs = te.Docs(null, docs, DocsFlags.NONE); int docid = docs.NextDoc(); while (docid != DocIdSetIterator.NO_MORE_DOCS) { idx.Add(docid, shape); docid = docs.NextDoc(); count++; } } term = te.Next(); } } /*long elapsed = Runtime.CurrentTimeMillis() - startTime; * log.Fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);*/ return idx; })); }
public virtual ShapeFieldCache <T> GetCache(AtomicReader reader) { // LUCENENET: ConditionalWeakTable allows us to simplify and remove locks on the // read operation. For the create case, we use Lazy<T> to ensure atomicity. return(sidx.GetValue(reader, (key) => new Lazy <ShapeFieldCache <T> >(() => { /*long startTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results * log.Fine("Building Cache [" + reader.MaxDoc() + "]");*/ ShapeFieldCache <T> idx = new ShapeFieldCache <T>(key.MaxDoc, m_defaultSize); int count = 0; DocsEnum docs = null; Terms terms = ((AtomicReader)key).GetTerms(m_shapeField); TermsEnum te = null; if (terms != null) { te = terms.GetEnumerator(te); while (te.MoveNext()) { T shape = ReadShape(te.Term); if (shape != null) { docs = te.Docs(null, docs, DocsFlags.NONE); int docid = docs.NextDoc(); while (docid != DocIdSetIterator.NO_MORE_DOCS) { idx.Add(docid, shape); docid = docs.NextDoc(); count++; } } } } /*long elapsed = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond - startTime; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results * log.Fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);*/ return idx; })).Value); }
public void TestDocsEnumStart() { Analyzer analyzer = new MockAnalyzer(Random); MemoryIndex memory = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024); memory.AddField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader; DocsEnum disi = TestUtil.Docs(Random, reader, "foo", new BytesRef("bar"), null, null, DocsFlags.NONE); int docid = disi.DocID; assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = reader.GetTerms("foo").GetEnumerator(); assertTrue(te.SeekExact(new BytesRef("bar"))); disi = te.Docs(null, disi, DocsFlags.NONE); docid = disi.DocID; assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); }
/// <summary> /// Returns a <see cref="DocsEnum"/> from a positioned <see cref="TermsEnum"/>, but /// randomly sometimes uses a <see cref="MultiDocsEnum"/>, <see cref="DocsAndPositionsEnum"/>. /// </summary> public static DocsEnum Docs(Random random, TermsEnum termsEnum, IBits liveDocs, DocsEnum reuse, DocsFlags flags) { if (random.NextBoolean()) { if (random.NextBoolean()) { DocsAndPositionsFlags posFlags; switch (random.Next(4)) { case 0: posFlags = 0; break; case 1: posFlags = DocsAndPositionsFlags.OFFSETS; break; case 2: posFlags = DocsAndPositionsFlags.PAYLOADS; break; default: posFlags = DocsAndPositionsFlags.OFFSETS | DocsAndPositionsFlags.PAYLOADS; break; } // TODO: cast to DocsAndPositionsEnum? DocsAndPositionsEnum docsAndPositions = termsEnum.DocsAndPositions(liveDocs, null, posFlags); if (docsAndPositions != null) { return(docsAndPositions); } } flags |= DocsFlags.FREQS; } return(termsEnum.Docs(liveDocs, reuse, flags)); }
protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, IBits acceptDocs, TermsEnum termsEnum) { BytesRef spare = new BytesRef(); DocsEnum docsEnum = null; for (int i = 0; i < m_outerInstance._terms.Count; i++) { if (termsEnum.SeekExact(m_outerInstance._terms.Get(m_outerInstance._ords[i], spare))) { docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsFlags.NONE); float score = m_outerInstance._scores[m_outerInstance._ords[i]]; for (int doc = docsEnum.NextDoc(); doc != NO_MORE_DOCS; doc = docsEnum.NextDoc()) { matchingDocs.Set(doc); // In the case the same doc is also related to a another doc, a score might be overwritten. I think this // can only happen in a many-to-many relation scores[doc] = score; } } } }
/// <summary> /// Default merge impl </summary> public virtual void Merge(MergeState mergeState, FieldInfo.IndexOptions? indexOptions, TermsEnum termsEnum) { BytesRef term; Debug.Assert(termsEnum != null); long sumTotalTermFreq = 0; long sumDocFreq = 0; long sumDFsinceLastAbortCheck = 0; FixedBitSet visitedDocs = new FixedBitSet(mergeState.SegmentInfo.DocCount); if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { if (DocsEnum == null) { DocsEnum = new MappingMultiDocsEnum(); } DocsEnum.MergeState = mergeState; MultiDocsEnum docsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: docsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsEnumIn, Index.DocsEnum.FLAG_NONE); if (docsEnumIn != null) { DocsEnum.Reset(docsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, DocsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.DocFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS) { if (DocsAndFreqsEnum == null) { DocsAndFreqsEnum = new MappingMultiDocsEnum(); } DocsAndFreqsEnum.MergeState = mergeState; MultiDocsEnum docsAndFreqsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: docsAndFreqsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsAndFreqsEnumIn); Debug.Assert(docsAndFreqsEnumIn != null); DocsAndFreqsEnum.Reset(docsAndFreqsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, DocsAndFreqsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { if (PostingsEnum == null) { PostingsEnum = new MappingMultiDocsAndPositionsEnum(); } PostingsEnum.MergeState = mergeState; MultiDocsAndPositionsEnum postingsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn, DocsAndPositionsEnum.FLAG_PAYLOADS); Debug.Assert(postingsEnumIn != null); PostingsEnum.Reset(postingsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, PostingsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } else { Debug.Assert(indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); if (PostingsEnum == null) { PostingsEnum = new MappingMultiDocsAndPositionsEnum(); } PostingsEnum.MergeState = mergeState; MultiDocsAndPositionsEnum postingsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn); Debug.Assert(postingsEnumIn != null); PostingsEnum.Reset(postingsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, PostingsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } Finish(indexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.Cardinality()); }
/// <summary> /// checks the terms enum sequentially /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums /// </summary> public virtual void AssertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep) { BytesRef term; IBits randomBits = new RandomBits(MAXDOC, Random.NextDouble(), Random); DocsAndPositionsEnum leftPositions = null; DocsAndPositionsEnum rightPositions = null; DocsEnum leftDocs = null; DocsEnum rightDocs = null; while ((term = leftTermsEnum.Next()) != null) { Assert.AreEqual(term, rightTermsEnum.Next()); AssertTermStats(leftTermsEnum, rightTermsEnum); if (deep) { // with payloads + off AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions)); AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions)); AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions)); // with payloads only AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.PAYLOADS)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.PAYLOADS)); AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.PAYLOADS)); AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.PAYLOADS)); // with offsets only AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.OFFSETS)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.OFFSETS)); AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.OFFSETS)); AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.OFFSETS)); // with positions only AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.NONE)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.NONE)); AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.NONE)); AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.NONE)); // with freqs: AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs)); AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs)); // w/o freqs: AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsFlags.NONE)); AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsFlags.NONE)); // with freqs: AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs)); AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs)); // w/o freqs: AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsFlags.NONE)); AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsFlags.NONE)); } } Assert.IsNull(rightTermsEnum.Next()); }
private void DuellReaders(CompositeReader other, AtomicReader memIndexReader) { AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other); Fields memFields = memIndexReader.Fields; foreach (string field in competitor.Fields) { Terms memTerms = memFields.GetTerms(field); Terms iwTerms = memIndexReader.GetTerms(field); if (iwTerms is null) { assertNull(memTerms); } else { NumericDocValues normValues = competitor.GetNormValues(field); NumericDocValues memNormValues = memIndexReader.GetNormValues(field); if (normValues != null) { // mem idx always computes norms on the fly assertNotNull(memNormValues); assertEquals(normValues.Get(0), memNormValues.Get(0)); } assertNotNull(memTerms); assertEquals(iwTerms.DocCount, memTerms.DocCount); assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq); assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq); TermsEnum iwTermsIter = iwTerms.GetEnumerator(); TermsEnum memTermsIter = memTerms.GetEnumerator(); if (iwTerms.HasPositions) { bool offsets = iwTerms.HasOffsets && memTerms.HasOffsets; while (iwTermsIter.MoveNext()) { assertTrue(memTermsIter.MoveNext()); assertEquals(iwTermsIter.Term, memTermsIter.Term); DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.DocsAndPositions(null, null); DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq); for (int i = 0; i < iwDocsAndPos.Freq; i++) { assertEquals("term: " + iwTermsIter.Term.Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition()); if (offsets) { assertEquals(iwDocsAndPos.StartOffset, memDocsAndPos.StartOffset); assertEquals(iwDocsAndPos.EndOffset, memDocsAndPos.EndOffset); } } } } } else { while (iwTermsIter.MoveNext()) { assertEquals(iwTermsIter.Term, memTermsIter.Term); DocsEnum iwDocsAndPos = iwTermsIter.Docs(null, null); DocsEnum memDocsAndPos = memTermsIter.Docs(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq); } } } } } }
private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, bool multipleValuesPerDocument, bool scoreDocsInOrder) { IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.RandomUniqueValues = new string[numRandomValues]; ISet <string> trackSet = new JCG.HashSet <string>(); context.RandomFrom = new bool[numRandomValues]; for (int i = 0; i < numRandomValues; i++) { string uniqueRandomValue; do { uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random); // uniqueRandomValue = TestUtil.randomSimpleString(random); } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.Add(uniqueRandomValue); context.RandomFrom[i] = Random.NextBoolean(); context.RandomUniqueValues[i] = uniqueRandomValue; } RandomDoc[] docs = new RandomDoc[nDocs]; for (int i = 0; i < nDocs; i++) { string id = Convert.ToString(i, CultureInfo.InvariantCulture); int randomI = Random.Next(context.RandomUniqueValues.Length); string value = context.RandomUniqueValues[randomI]; Document document = new Document(); document.Add(NewTextField(Random, "id", id, Field.Store.NO)); document.Add(NewTextField(Random, "value", value, Field.Store.NO)); bool from = context.RandomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + Random.Next(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); for (int j = 0; j < numberOfLinkValues; j++) { string linkValue = context.RandomUniqueValues[Random.Next(context.RandomUniqueValues.Length)]; docs[i].linkValues.Add(linkValue); if (from) { if (!context.FromDocuments.TryGetValue(linkValue, out IList <RandomDoc> fromDocs)) { context.FromDocuments[linkValue] = fromDocs = new JCG.List <RandomDoc>(); } if (!context.RandomValueFromDocs.TryGetValue(value, out IList <RandomDoc> randomValueFromDocs)) { context.RandomValueFromDocs[value] = randomValueFromDocs = new JCG.List <RandomDoc>(); } fromDocs.Add(docs[i]); randomValueFromDocs.Add(docs[i]); document.Add(NewTextField(Random, "from", linkValue, Field.Store.NO)); } else { if (!context.ToDocuments.TryGetValue(linkValue, out IList <RandomDoc> toDocuments)) { context.ToDocuments[linkValue] = toDocuments = new JCG.List <RandomDoc>(); } if (!context.RandomValueToDocs.TryGetValue(value, out IList <RandomDoc> randomValueToDocs)) { context.RandomValueToDocs[value] = randomValueToDocs = new JCG.List <RandomDoc>(); } toDocuments.Add(docs[i]); randomValueToDocs.Add(docs[i]); document.Add(NewTextField(Random, "to", linkValue, Field.Store.NO)); } } RandomIndexWriter w; if (from) { w = fromWriter; } else { w = toWriter; } w.AddDocument(document); if (Random.Next(10) == 4) { w.Commit(); } if (Verbose) { Console.WriteLine("Added document[" + docs[i].id + "]: " + document); } } // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for // any ScoreMode. IndexSearcher fromSearcher = NewSearcher(fromWriter.GetReader()); IndexSearcher toSearcher = NewSearcher(toWriter.GetReader()); for (int i = 0; i < context.RandomUniqueValues.Length; i++) { string uniqueRandomValue = context.RandomUniqueValues[i]; string fromField; string toField; IDictionary <string, IDictionary <int, JoinScore> > queryVals; if (context.RandomFrom[i]) { fromField = "from"; toField = "to"; queryVals = context.FromHitsToJoinScore; } else { fromField = "to"; toField = "from"; queryVals = context.ToHitsToJoinScore; } IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>(); if (multipleValuesPerDocument) { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousClass3(fromField, joinValueToJoinScores)); } else { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousClass4(fromField, joinValueToJoinScores)); } IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>(); if (multipleValuesPerDocument) { if (scoreDocsInOrder) { AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader); Terms terms = slowCompositeReader.GetTerms(toField); if (terms != null) { DocsEnum docsEnum = null; TermsEnum termsEnum = null; JCG.SortedSet <BytesRef> joinValues = new JCG.SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer); joinValues.UnionWith(joinValueToJoinScores.Keys); foreach (BytesRef joinValue in joinValues) { termsEnum = terms.GetEnumerator(termsEnum); if (termsEnum.SeekExact(joinValue)) { docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE); JoinScore joinScore = joinValueToJoinScores[joinValue]; for (int doc = docsEnum.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.NextDoc()) { // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.ContainsKey(doc)) { docToJoinScore[doc] = joinScore; } } } } } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousClass5(toField, joinValueToJoinScores, docToJoinScore)); } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousClass6(toField, joinValueToJoinScores, docToJoinScore)); } queryVals[uniqueRandomValue] = docToJoinScore; } fromSearcher.IndexReader.Dispose(); toSearcher.IndexReader.Dispose(); return(context); }