/// <summary> /// Returns a Bits for a reader's docsWithField (potentially merging on-the-fly) /// <p> /// this is a slow way to access this bitset. Instead, access them per-segment /// with <seealso cref="AtomicReader#getDocsWithField(String)"/> /// </p> /// /// </summary> public static Bits GetDocsWithField(IndexReader r, string field) { IList <AtomicReaderContext> leaves = r.Leaves; int size = leaves.Count; if (size == 0) { return(null); } else if (size == 1) { return(leaves[0].AtomicReader.GetDocsWithField(field)); } bool anyReal = false; bool anyMissing = false; Bits[] values = new Bits[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = leaves[i]; Bits v = context.AtomicReader.GetDocsWithField(field); if (v == null) { v = new Lucene.Net.Util.Bits_MatchNoBits(context.Reader.MaxDoc); anyMissing = true; } else { anyReal = true; if (v is Lucene.Net.Util.Bits_MatchAllBits == false) { anyMissing = true; } } values[i] = v; starts[i] = context.DocBase; } starts[size] = r.MaxDoc; if (!anyReal) { return(null); } else if (!anyMissing) { return(new Lucene.Net.Util.Bits_MatchAllBits(r.MaxDoc)); } else { return(new MultiBits(values, starts, false)); } }
private void MergeDocValues(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = Codec.DocValuesFormat().FieldsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in MergeState.FieldInfos) { DocValuesType_e? type = field.DocValuesType; if (type != null) { if (type == DocValuesType_e.NUMERIC) { IList<NumericDocValues> toMerge = new List<NumericDocValues>(); //IList<Bits> docsWithField = new List<Bits>(); foreach (AtomicReader reader in MergeState.Readers) { NumericDocValues values = reader.GetNumericDocValues(field.Name); Bits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_NUMERIC; bits = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc); } toMerge.Add(values); //docsWithField.Add(bits); } consumer.MergeNumericField(field, MergeState, toMerge/*, docsWithField*/); } else if (type == DocValuesType_e.BINARY) { IList<BinaryDocValues> toMerge = new List<BinaryDocValues>(); //IList<Bits> docsWithField = new List<Bits>(); foreach (AtomicReader reader in MergeState.Readers) { BinaryDocValues values = reader.GetBinaryDocValues(field.Name); Bits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_BINARY; bits = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc); } toMerge.Add(values); //docsWithField.Add(bits); } consumer.MergeBinaryField(field, MergeState, toMerge/*, docsWithField*/); } else if (type == DocValuesType_e.SORTED) { IList<SortedDocValues> toMerge = new List<SortedDocValues>(); foreach (AtomicReader reader in MergeState.Readers) { SortedDocValues values = reader.GetSortedDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED; } toMerge.Add(values); } consumer.MergeSortedField(field, MergeState, toMerge); } else if (type == DocValuesType_e.SORTED_SET) { IList<SortedSetDocValues> toMerge = new List<SortedSetDocValues>(); foreach (AtomicReader reader in MergeState.Readers) { SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED_SET; } toMerge.Add(values); } consumer.MergeSortedSetField(field, MergeState, toMerge); } else { throw new InvalidOperationException("type=" + type); } } } success = true; } finally { if (success) { IOUtils.Close(consumer); } else { IOUtils.CloseWhileHandlingException(consumer); } } }
public virtual void TestReuseDocsEnumSameBitsOrNull() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader open = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in open.Leaves) { Terms terms = ((AtomicReader)ctx.Reader).Terms("body"); TermsEnum iterator = terms.Iterator(null); IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(open.MaxDoc); DocsEnum docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(bits, docs, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(1, enums.Count); enums.Clear(); iterator = terms.Iterator(null); docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(new MatchNoBits(open.MaxDoc), docs, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); enums.Clear(); iterator = terms.Iterator(null); docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(null, docs, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(1, enums.Count); } IOUtils.Close(writer, open, dir); }
public virtual void TestReuseDocsEnumDifferentReader() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader firstReader = DirectoryReader.Open(dir); DirectoryReader secondReader = DirectoryReader.Open(dir); IList<AtomicReaderContext> leaves = firstReader.Leaves; IList<AtomicReaderContext> leaves2 = secondReader.Leaves; foreach (AtomicReaderContext ctx in leaves) { Terms terms = ((AtomicReader)ctx.Reader).Terms("body"); TermsEnum iterator = terms.Iterator(null); IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc); iterator = terms.Iterator(null); DocsEnum docs = null; BytesRef term = null; while ((term = iterator.Next()) != null) { docs = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); iterator = terms.Iterator(null); enums.Clear(); docs = null; while ((term = iterator.Next()) != null) { docs = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); } IOUtils.Close(writer, firstReader, secondReader, dir); }
private void MergeDocValues(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = Codec.DocValuesFormat().FieldsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in MergeState.FieldInfos) { DocValuesType_e?type = field.DocValuesType; if (type != null) { if (type == DocValuesType_e.NUMERIC) { IList <NumericDocValues> toMerge = new List <NumericDocValues>(); //IList<Bits> docsWithField = new List<Bits>(); foreach (AtomicReader reader in MergeState.Readers) { NumericDocValues values = reader.GetNumericDocValues(field.Name); Bits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_NUMERIC; bits = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc); } toMerge.Add(values); //docsWithField.Add(bits); } consumer.MergeNumericField(field, MergeState, toMerge /*, docsWithField*/); } else if (type == DocValuesType_e.BINARY) { IList <BinaryDocValues> toMerge = new List <BinaryDocValues>(); //IList<Bits> docsWithField = new List<Bits>(); foreach (AtomicReader reader in MergeState.Readers) { BinaryDocValues values = reader.GetBinaryDocValues(field.Name); Bits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_BINARY; bits = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc); } toMerge.Add(values); //docsWithField.Add(bits); } consumer.MergeBinaryField(field, MergeState, toMerge /*, docsWithField*/); } else if (type == DocValuesType_e.SORTED) { IList <SortedDocValues> toMerge = new List <SortedDocValues>(); foreach (AtomicReader reader in MergeState.Readers) { SortedDocValues values = reader.GetSortedDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED; } toMerge.Add(values); } consumer.MergeSortedField(field, MergeState, toMerge); } else if (type == DocValuesType_e.SORTED_SET) { IList <SortedSetDocValues> toMerge = new List <SortedSetDocValues>(); foreach (AtomicReader reader in MergeState.Readers) { SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED_SET; } toMerge.Add(values); } consumer.MergeSortedSetField(field, MergeState, toMerge); } else { throw new InvalidOperationException("type=" + type); } } } success = true; } finally { if (success) { IOUtils.Close(consumer); } else { IOUtils.CloseWhileHandlingException(consumer); } } }
/// <summary> /// Returns a Bits for a reader's docsWithField (potentially merging on-the-fly) /// <p> /// this is a slow way to access this bitset. Instead, access them per-segment /// with <seealso cref="AtomicReader#getDocsWithField(String)"/> /// </p> /// /// </summary> public static Bits GetDocsWithField(IndexReader r, string field) { IList<AtomicReaderContext> leaves = r.Leaves; int size = leaves.Count; if (size == 0) { return null; } else if (size == 1) { return leaves[0].AtomicReader.GetDocsWithField(field); } bool anyReal = false; bool anyMissing = false; Bits[] values = new Bits[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = leaves[i]; Bits v = context.AtomicReader.GetDocsWithField(field); if (v == null) { v = new Lucene.Net.Util.Bits_MatchNoBits(context.Reader.MaxDoc); anyMissing = true; } else { anyReal = true; if (v is Lucene.Net.Util.Bits_MatchAllBits == false) { anyMissing = true; } } values[i] = v; starts[i] = context.DocBase; } starts[size] = r.MaxDoc; if (!anyReal) { return null; } else if (!anyMissing) { return new Lucene.Net.Util.Bits_MatchAllBits(r.MaxDoc); } else { return new MultiBits(values, starts, false); } }