void doPrevSetBit(BitArray a, FixedBitSet b) { int aa = a.Length + rnd.Next(100); int bb = aa; do { // aa = a.prevSetBit(aa-1); aa--; while ((aa >= 0) && (!a.Get(aa))) { aa--; } if (b.Length() == 0) { bb = -1; } else if (bb > b.Length() - 1) { bb = b.PrevSetBit(b.Length() - 1); } else if (bb < 1) { bb = -1; } else { bb = bb >= 1 ? b.PrevSetBit(bb - 1) : -1; } Assert.AreEqual(aa, bb); } while (aa >= 0); }
internal Iterator(int size, PagedGrowableWriter values, FixedBitSet docsWithField, PagedMutable docs) { this.Size = size; this.Values = values; this.DocsWithField = docsWithField; this.Docs = docs; }
internal Iterator(int size, PagedGrowableWriter offsets, PagedGrowableWriter lengths, PagedMutable docs, BytesRef values, FixedBitSet docsWithField) { this.Offsets = offsets; this.Size = size; this.Lengths = lengths; this.Docs = docs; this.DocsWithField = docsWithField; Value_Renamed = (BytesRef)values.Clone(); }
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed, bool trackDocsWithField) { Pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT); DocsWithField = trackDocsWithField ? new FixedBitSet(64) : null; BytesUsed = Pending.RamBytesUsed() + DocsWithFieldBytesUsed(); this.FieldInfo = fieldInfo; this.IwBytesUsed = iwBytesUsed; iwBytesUsed.AddAndGet(BytesUsed); }
void doNextSetBit(BitArray a, FixedBitSet b) { int aa = -1, bb = -1; do { aa = a.NextSetBit(aa + 1); bb = bb < b.Length() - 1 ? b.NextSetBit(bb + 1) : -1; Assert.AreEqual(aa, bb); } while (aa >= 0); }
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.FieldInfo = fieldInfo; this.Bytes = new PagedBytes(BLOCK_BITS); this.BytesOut = Bytes.DataOutput; this.Lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT); this.IwBytesUsed = iwBytesUsed; this.DocsWithField = new FixedBitSet(64); this.BytesUsed = DocsWithFieldBytesUsed(); iwBytesUsed.AddAndGet(BytesUsed); }
void doGet(BitArray a, FixedBitSet b) { int max = b.Length(); for (int i = 0; i < max; i++) { if (a.Get(i) != b.Get(i)) { Assert.Fail("mismatch: BitSet=[" + i + "]=" + a.Get(i)); } } }
public void TestMissingTerms() { string fieldName = "field1"; Directory rd = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), rd); for (int i = 0; i < 100; i++) { Document doc = new Document(); int term = i * 10; //terms are units of 10; doc.Add(NewStringField(fieldName, "" + term, Field.Store.YES)); w.AddDocument(doc); } IndexReader reader = SlowCompositeReaderWrapper.Wrap(w.Reader); assertTrue(reader.Context is AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext)reader.Context; w.Dispose(); IList <Term> terms = new List <Term>(); terms.Add(new Term(fieldName, "19")); FixedBitSet bits = (FixedBitSet)TermsFilter(Random().NextBoolean(), terms).GetDocIdSet(context, context.AtomicReader.LiveDocs); assertNull("Must match nothing", bits); terms.Add(new Term(fieldName, "20")); bits = (FixedBitSet)TermsFilter(Random().NextBoolean(), terms).GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must match 1", 1, bits.Cardinality()); terms.Add(new Term(fieldName, "10")); bits = (FixedBitSet)TermsFilter(Random().NextBoolean(), terms).GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must match 2", 2, bits.Cardinality()); terms.Add(new Term(fieldName, "00")); bits = (FixedBitSet)TermsFilter(Random().NextBoolean(), terms).GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must match 2", 2, bits.Cardinality()); reader.Dispose(); rd.Dispose(); }
public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, bool negateFilter) : base(context.AtomicReader) { int maxDoc = m_input.MaxDoc; FixedBitSet bits = new FixedBitSet(maxDoc); // ignore livedocs here, as we filter them later: DocIdSet docs = preserveFilter.GetDocIdSet(context, null); if (docs != null) { DocIdSetIterator it = docs.GetIterator(); if (it != null) { bits.Or(it); } } if (negateFilter) { bits.Flip(0, maxDoc); } if (m_input.HasDeletions) { IBits oldLiveDocs = m_input.LiveDocs; if (Debugging.AssertsEnabled) { Debugging.Assert(oldLiveDocs != null); } DocIdSetIterator it = bits.GetIterator(); for (int i = it.NextDoc(); i < maxDoc; i = it.NextDoc()) { if (!oldLiveDocs.Get(i)) { // we can safely modify the current bit, as the iterator already stepped over it: bits.Clear(i); } } } this.liveDocs = bits; this.numDocs = bits.Cardinality; }
protected virtual void CollectDocs(FixedBitSet bitSet) { // LUCENENET specific - use guard clause instead of assert if (m_termsEnum is null) { throw new InvalidOperationException($"{nameof(m_termsEnum)} must not be null."); } if (bitSet is null) { throw new ArgumentNullException(nameof(bitSet)); } //WARN: keep this specialization in sync m_docsEnum = m_termsEnum.Docs(m_acceptDocs, m_docsEnum, DocsFlags.NONE); int docid; while ((docid = m_docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { bitSet.Set(docid); } }
/// <summary> /// Useful from an assert. </summary> internal virtual bool IsConsistent(int maxDoc) { FixedBitSet targets = new FixedBitSet(maxDoc); for (int i = 0; i < maxDoc; ++i) { int target = Map(i); if (target < 0 || target >= maxDoc) { Debug.Assert(false, "out of range: " + target + " not in [0-" + maxDoc + "["); return(false); } else if (targets.Get(target)) { Debug.Assert(false, target + " is already taken (" + i + ")"); return(false); } } return(true); }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { int maxDoc = context.Reader.MaxDoc; FieldCache.Ints idSource = FieldCache.DEFAULT.GetInts(context.AtomicReader, "id", false); Assert.IsNotNull(idSource); FixedBitSet bits = new FixedBitSet(maxDoc); for (int docID = 0; docID < maxDoc; docID++) { if ((float)Random.NextDouble() <= Density && (acceptDocs == null || acceptDocs.Get(docID))) { bits.Set(docID); //System.out.println(" acc id=" + idSource.Get(docID) + " docID=" + docID + " id=" + idSource.Get(docID) + " v=" + docValues.Get(idSource.Get(docID)).Utf8ToString()); MatchValues.Add(DocValues[idSource.Get(docID)]); } } return(bits); }
private FixedBitSet CorrectBits(AtomicReader reader, IBits acceptDocs) { FixedBitSet bits = new FixedBitSet(reader.MaxDoc); //assume all are INvalid Terms terms = reader.Fields.GetTerms(fieldName); if (terms == null) { return(bits); } TermsEnum termsEnum = terms.GetEnumerator(); DocsEnum docs = null; while (termsEnum.MoveNext()) { docs = termsEnum.Docs(acceptDocs, docs, DocsFlags.NONE); int doc = docs.NextDoc(); if (doc != DocIdSetIterator.NO_MORE_DOCS) { if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) { bits.Set(doc); } else { int lastDoc /* = doc*/; // LUCENENET: Removed unnecessary assignment while (true) { lastDoc = doc; doc = docs.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } } bits.Set(lastDoc); } } } return(bits); }
/// <param name="targetMaxSaturation"> /// A number between 0 and 1 describing the % of bits that would ideally be set in the result. /// Lower values have better accuracy but require more space. /// </param> /// <return>A smaller FuzzySet or null if the current set is already over-saturated</return> public FuzzySet Downsize(float targetMaxSaturation) { var numBitsSet = _filter.Cardinality(); FixedBitSet rightSizedBitSet; var rightSizedBitSetSize = _bloomSize; //Hopefully find a smaller size bitset into which we can project accumulated values while maintaining desired saturation level foreach (var candidateBitsetSize in from candidateBitsetSize in UsableBitSetSizes let candidateSaturation = numBitsSet / (float)candidateBitsetSize where candidateSaturation <= targetMaxSaturation select candidateBitsetSize) { rightSizedBitSetSize = candidateBitsetSize; break; } // Re-project the numbers to a smaller space if necessary if (rightSizedBitSetSize < _bloomSize) { // Reset the choice of bitset to the smaller version rightSizedBitSet = new FixedBitSet(rightSizedBitSetSize + 1); // Map across the bits from the large set to the smaller one var bitIndex = 0; do { bitIndex = _filter.NextSetBit(bitIndex); if (bitIndex < 0) { continue; } // Project the larger number into a smaller one effectively // modulo-ing by using the target bitset size as a mask var downSizedBitIndex = bitIndex & rightSizedBitSetSize; rightSizedBitSet.Set(downSizedBitIndex); bitIndex++; } while ((bitIndex >= 0) && (bitIndex <= _bloomSize)); } else { return(null); } return(new FuzzySet(rightSizedBitSet, rightSizedBitSetSize, _hashFunction)); }
public void TestFieldNotPresent() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int num = AtLeast(3); int skip = Random.Next(num); var terms = new JCG.List <Term>(); for (int i = 0; i < num; i++) { terms.Add(new Term("field" + i, "content1")); Document doc = new Document(); if (skip == i) { continue; } doc.Add(NewStringField("field" + i, "content1", Field.Store.YES)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader reader = w.GetReader(); w.Dispose(); assertEquals(1, reader.Leaves.size()); AtomicReaderContext context = reader.Leaves[0]; TermsFilter tf = new TermsFilter(terms); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality); reader.Dispose(); dir.Dispose(); }
public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { int valueCount = 0; BytesRef lastValue = null; foreach (BytesRef b in values) { Debug.Assert(b != null); Debug.Assert(b.IsValid()); if (valueCount > 0) { Debug.Assert(b.CompareTo(lastValue) > 0); } lastValue = BytesRef.DeepCopyOf(b); valueCount++; } Debug.Assert(valueCount <= maxDoc); FixedBitSet seenOrds = new FixedBitSet(valueCount); int count = 0; foreach (long?v in docToOrd) { Debug.Assert(v != null); int ord = (int)v.Value; Debug.Assert(ord >= -1 && ord < valueCount); if (ord >= 0) { seenOrds.Set(ord); } count++; } Debug.Assert(count == maxDoc); Debug.Assert(seenOrds.Cardinality() == valueCount); CheckIterator(values.GetEnumerator(), valueCount, false); CheckIterator(docToOrd.GetEnumerator(), maxDoc, false); @in.AddSortedField(field, values, docToOrd); }
private BigSegmentedArray GetCollapsedCounts() { if (m_collapsedCounts == null) { m_collapsedCounts = new LazyBigInt32Array(m_bucketValues.Count); FacetDataCache dataCache = m_subCollector.DataCache; ITermValueList subList = dataCache.ValArray; BigSegmentedArray subcounts = m_subCollector.Count; FixedBitSet indexSet = new FixedBitSet(subcounts.Length); int c = 0; int i = 0; foreach (string val in m_bucketValues) { if (val.Length > 0) { string[] subVals = m_predefinedBuckets.Get(val); int count = 0; foreach (string subVal in subVals) { int index = subList.IndexOf(subVal); if (index > 0) { int subcount = subcounts.Get(index); count += subcount; if (!indexSet.Get(index)) { indexSet.Set(index); c += dataCache.Freqs[index]; } } } m_collapsedCounts.Add(i, count); } i++; } m_collapsedCounts.Add(0, (m_numdocs - c)); } return(m_collapsedCounts); }
public virtual void AddValue(int docID, long value) { if (docID < Pending.Size()) { throw new System.ArgumentException("DocValuesField \"" + FieldInfo.Name + "\" appears more than once in this document (only one value is allowed per field)"); } // Fill in any holes: for (int i = (int)Pending.Size(); i < docID; ++i) { Pending.Add(MISSING); } Pending.Add(value); if (DocsWithField != null) { DocsWithField = FixedBitSet.EnsureCapacity(DocsWithField, docID); DocsWithField.Set(docID); } UpdateBytesUsed(); }
public virtual void TestBuildDocMap() { int maxDoc = TestUtil.NextInt(Random(), 1, 128); int numDocs = TestUtil.NextInt(Random(), 0, maxDoc); int numDeletedDocs = maxDoc - numDocs; FixedBitSet liveDocs = new FixedBitSet(maxDoc); for (int i = 0; i < numDocs; ++i) { while (true) { int docID = Random().Next(maxDoc); if (!liveDocs.Get(docID)) { liveDocs.Set(docID); break; } } } MergeState.DocMap docMap = MergeState.DocMap.Build(maxDoc, liveDocs); Assert.AreEqual(maxDoc, docMap.MaxDoc); Assert.AreEqual(numDocs, docMap.NumDocs); Assert.AreEqual(numDeletedDocs, docMap.NumDeletedDocs); // assert the mapping is compact for (int i = 0, del = 0; i < maxDoc; ++i) { if (!liveDocs.Get(i)) { Assert.AreEqual(-1, docMap.Get(i)); ++del; } else { Assert.AreEqual(i - del, docMap.Get(i)); } } }
public virtual void AddValue(int docID, long value) { if (docID < pending.Count) { throw new System.ArgumentException("DocValuesField \"" + fieldInfo.Name + "\" appears more than once in this document (only one value is allowed per field)"); } // Fill in any holes: for (int i = (int)pending.Count; i < docID; ++i) { pending.Add(MISSING); } pending.Add(value); if (docsWithField != null) { docsWithField = FixedBitSet.EnsureCapacity(docsWithField, docID); docsWithField.Set(docID); } UpdateBytesUsed(); }
public static FuzzySet Deserialize(DataInput input) { var version = input.ReadInt32(); if (version == VERSION_SPI) { input.ReadString(); } var hashFunction = HashFunctionForVersion(version); var bloomSize = input.ReadInt32(); var numLongs = input.ReadInt32(); var longs = new long[numLongs]; for (var i = 0; i < numLongs; i++) { longs[i] = input.ReadInt64(); } var bits = new FixedBitSet(longs, bloomSize + 1); return(new FuzzySet(bits, bloomSize, hashFunction)); }
public void testMissingTerms() { String fieldName = "field1"; Directory rd = new RAMDirectory(); var w = new IndexWriter(rd, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 100; i++) { var doc = new Document(); int term = i * 10; //terms are units of 10; doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.ANALYZED)); w.AddDocument(doc); } IndexReader reader = w.GetReader(); w.Close(); TermsFilter tf = new TermsFilter(); tf.AddTerm(new Term(fieldName, "19")); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(reader); Assert.AreEqual(0, bits.Cardinality(), "Must match nothing"); tf.AddTerm(new Term(fieldName, "20")); bits = (FixedBitSet)tf.GetDocIdSet(reader); Assert.AreEqual(1, bits.Cardinality(), "Must match 1"); tf.AddTerm(new Term(fieldName, "10")); bits = (FixedBitSet)tf.GetDocIdSet(reader); Assert.AreEqual(2, bits.Cardinality(), "Must match 2"); tf.AddTerm(new Term(fieldName, "00")); bits = (FixedBitSet)tf.GetDocIdSet(reader); Assert.AreEqual(2, bits.Cardinality(), "Must match 2"); reader.Close(); rd.Close(); }
public void testHashCodeEquals() { // This test can't handle numBits==0: int numBits = rnd.Next(2000) + 1; FixedBitSet b1 = new FixedBitSet(numBits); FixedBitSet b2 = new FixedBitSet(numBits); Assert.IsTrue(b1.Equals(b2)); Assert.IsTrue(b2.Equals(b1)); for (int iter = 0; iter < 10 * rnd.Next(500); iter++) { int idx = rnd.Next(numBits); if (!b1.Get(idx)) { b1.Set(idx); Assert.IsFalse(b1.Equals(b2)); Assert.AreNotEqual(b1.GetHashCode(), b2.GetHashCode()); b2.Set(idx); Assert.AreEqual(b1, b2); Assert.AreEqual(b1.GetHashCode(), b2.GetHashCode()); } } }
public override FieldComparer SetNextReader(AtomicReaderContext context) { DocIdSet parents = outerInstance.parentsFilter.GetDocIdSet(context, null); if (parents == null) { throw new InvalidOperationException("AtomicReader " + context.AtomicReader + " contains no parents!"); } if (!(parents is FixedBitSet)) { throw new InvalidOperationException("parentFilter must return FixedBitSet; got " + parents); } parentBits = (FixedBitSet)parents; for (int i = 0; i < parentComparers.Length; i++) { parentComparers[i] = parentComparers[i].SetNextReader(context); } for (int i = 0; i < childComparers.Length; i++) { childComparers[i] = childComparers[i].SetNextReader(context); } return(this); }
public void TestFieldNotPresent() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int num = AtLeast(3); int skip = Random().Next(num); var terms = new List <Term>(); for (int i = 0; i < num; i++) { terms.Add(new Term("field" + i, "content1")); Document doc = new Document(); if (skip == i) { continue; } doc.Add(NewStringField("field" + i, "content1", Field.Store.YES)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader reader = w.Reader; w.Dispose(); assertEquals(1, reader.Leaves.size()); AtomicReaderContext context = reader.Leaves.First(); TermsFilter tf = new TermsFilter(terms); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality()); reader.Dispose(); dir.Dispose(); }
public override void Merge(DocValuesFieldUpdates other) { Debug.Assert(other is NumericDocValuesFieldUpdates); NumericDocValuesFieldUpdates otherUpdates = (NumericDocValuesFieldUpdates)other; if (size + otherUpdates.size > int.MaxValue) { throw new InvalidOperationException("cannot support more than System.Int32.MaxValue doc/value entries; size=" + size + " other.size=" + otherUpdates.size); } docs = docs.Grow(size + otherUpdates.size); values = values.Grow(size + otherUpdates.size); docsWithField = FixedBitSet.EnsureCapacity(docsWithField, (int)docs.Count); for (int i = 0; i < otherUpdates.size; i++) { int doc = (int)otherUpdates.docs.Get(i); if (otherUpdates.docsWithField.Get(i)) { docsWithField.Set(size); } docs.Set(size, doc); values.Set(size, otherUpdates.values.Get(i)); ++size; } }
public override FieldComparer SetNextReader(AtomicReaderContext context) { DocIdSet innerDocuments = _childFilter.GetDocIdSet(context, null); if (IsEmpty(innerDocuments)) { _childDocuments = null; } else if (innerDocuments is FixedBitSet fixedBitSet) { _childDocuments = fixedBitSet; } else { DocIdSetIterator iterator = innerDocuments.GetIterator(); _childDocuments = iterator != null?ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null; } DocIdSet rootDocuments = _parentFilter.GetDocIdSet(context, null); if (IsEmpty(rootDocuments)) { _parentDocuments = null; } else if (rootDocuments is FixedBitSet fixedBitSet) { _parentDocuments = fixedBitSet; } else { DocIdSetIterator iterator = rootDocuments.GetIterator(); _parentDocuments = iterator != null?ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null; } _wrappedComparer = _wrappedComparer.SetNextReader(context); return(this); }
protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, IBits acceptDocs, TermsEnum termsEnum) { BytesRef spare = new BytesRef(); DocsEnum docsEnum = null; for (int i = 0; i < m_outerInstance._terms.Count; i++) { if (termsEnum.SeekExact(m_outerInstance._terms.Get(m_outerInstance._ords[i], spare))) { docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsFlags.NONE); float score = m_outerInstance._scores[m_outerInstance._ords[i]]; for (int doc = docsEnum.NextDoc(); doc != NO_MORE_DOCS; doc = docsEnum.NextDoc()) { matchingDocs.Set(doc); // In the case the same doc is also related to a another doc, a score might be overwritten. I think this // can only happen in a many-to-many relation scores[doc] = score; } } } }
public override void Merge(DocValuesFieldUpdates other) { Debug.Assert(other is NumericDocValuesFieldUpdates); NumericDocValuesFieldUpdates otherUpdates = (NumericDocValuesFieldUpdates)other; if (Size + otherUpdates.Size > int.MaxValue) { throw new InvalidOperationException("cannot support more than Integer.MAX_VALUE doc/value entries; size=" + Size + " other.size=" + otherUpdates.Size); } Docs = Docs.Grow(Size + otherUpdates.Size); Values = Values.Grow(Size + otherUpdates.Size); DocsWithField = FixedBitSet.EnsureCapacity(DocsWithField, (int)Docs.Size()); for (int i = 0; i < otherUpdates.Size; i++) { int doc = (int)otherUpdates.Docs.Get(i); if (otherUpdates.DocsWithField.Get(i)) { DocsWithField.Set(Size); } Docs.Set(Size, doc); Values.Set(Size, otherUpdates.Values.Get(i)); ++Size; } }
public virtual void AddValue(int docID, BytesRef value) { if (docID < addedValues) { throw new ArgumentOutOfRangeException(nameof(docID), "DocValuesField \"" + fieldInfo.Name + "\" appears more than once in this document (only one value is allowed per field)"); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) } if (value is null) { throw new ArgumentNullException("field=\"" + fieldInfo.Name + "\": null value not allowed"); // LUCENENET specific - changed from IllegalArgumentException to ArgumentNullException (.NET convention) } if (value.Length > MAX_LENGTH) { throw new ArgumentException("DocValuesField \"" + fieldInfo.Name + "\" is too large, must be <= " + MAX_LENGTH); } // Fill in any holes: while (addedValues < docID) { addedValues++; lengths.Add(0); } addedValues++; lengths.Add(value.Length); try { bytesOut.WriteBytes(value.Bytes, value.Offset, value.Length); } catch (Exception ioe) when(ioe.IsIOException()) { // Should never happen! throw RuntimeException.Create(ioe); } docsWithField = FixedBitSet.EnsureCapacity(docsWithField, docID); docsWithField.Set(docID); UpdateBytesUsed(); }
private IBits GetMissingBits(int fieldNumber, long offset, long length) { if (offset == -1) { return(new Bits.MatchAllBits(maxDoc)); } else { IBits instance; UninterruptableMonitor.Enter(this); try { if (!docsWithFieldInstances.TryGetValue(fieldNumber, out instance)) { var data = (IndexInput)this.data.Clone(); data.Seek(offset); if (Debugging.AssertsEnabled) { Debugging.Assert(length % 8 == 0); } var bits = new long[(int)length >> 3]; for (var i = 0; i < bits.Length; i++) { bits[i] = data.ReadInt64(); } instance = new FixedBitSet(bits, maxDoc); docsWithFieldInstances[fieldNumber] = instance; } } finally { UninterruptableMonitor.Exit(this); } return(instance); } }
public void UndeleteAll() { int maxDoc = m_input.MaxDoc; liveDocs = new FixedBitSet(m_input.MaxDoc); if (m_input.HasDeletions) { IBits oldLiveDocs = m_input.LiveDocs; Debug.Assert(oldLiveDocs != null); // this loop is a little bit ineffective, as Bits has no nextSetBit(): for (int i = 0; i < maxDoc; i++) { if (oldLiveDocs.Get(i)) { liveDocs.Set(i); } } } else { // mark all docs as valid liveDocs.Set(0, maxDoc); } }
private void Add(int doc, BytesRef value) // LUCENENET specific: Marked private instead of public and changed the value parameter type { // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation if (size == int.MaxValue) { throw IllegalStateException.Create("cannot support more than System.Int32.MaxValue doc/value entries"); } BytesRef val = value; if (val is null) { val = BinaryDocValuesUpdate.MISSING; } // grow the structures to have room for more elements if (docs.Count == size) { docs = docs.Grow(size + 1); offsets = offsets.Grow(size + 1); lengths = lengths.Grow(size + 1); docsWithField = FixedBitSet.EnsureCapacity(docsWithField, (int)docs.Count); } if (val != BinaryDocValuesUpdate.MISSING) { // only mark the document as having a value in that field if the value wasn't set to null (MISSING) docsWithField.Set(size); } docs.Set(size, doc); offsets.Set(size, values.Length); lengths.Set(size, val.Length); values.Append(val); ++size; }
public virtual void AddValue(int docID, BytesRef value) { if (docID < addedValues) { throw new System.ArgumentException("DocValuesField \"" + fieldInfo.Name + "\" appears more than once in this document (only one value is allowed per field)"); } if (value == null) { throw new System.ArgumentException("field=\"" + fieldInfo.Name + "\": null value not allowed"); } if (value.Length > MAX_LENGTH) { throw new System.ArgumentException("DocValuesField \"" + fieldInfo.Name + "\" is too large, must be <= " + MAX_LENGTH); } // Fill in any holes: while (addedValues < docID) { addedValues++; lengths.Add(0); } addedValues++; lengths.Add(value.Length); try { bytesOut.WriteBytes(value.Bytes, value.Offset, value.Length); } catch (System.IO.IOException ioe) { // Should never happen! throw new Exception(ioe.ToString(), ioe); } docsWithField = FixedBitSet.EnsureCapacity(docsWithField, docID); docsWithField.Set(docID); UpdateBytesUsed(); }
public override void Add(int doc, object value) { // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation if (size == int.MaxValue) { throw new InvalidOperationException("cannot support more than System.Int32.MaxValue doc/value entries"); } BytesRef val = (BytesRef)value; if (val == null) { val = BinaryDocValuesUpdate.MISSING; } // grow the structures to have room for more elements if (docs.Count == size) { docs = docs.Grow(size + 1); offsets = offsets.Grow(size + 1); lengths = lengths.Grow(size + 1); docsWithField = FixedBitSet.EnsureCapacity(docsWithField, (int)docs.Count); } if (val != BinaryDocValuesUpdate.MISSING) { // only mark the document as having a value in that field if the value wasn't set to null (MISSING) docsWithField.Set(size); } docs.Set(size, doc); offsets.Set(size, values.Length); lengths.Set(size, val.Length); values.Append(val); ++size; }
public override void Add(int doc, object value) { // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation if (Size == int.MaxValue) { throw new InvalidOperationException("cannot support more than Integer.MAX_VALUE doc/value entries"); } BytesRef val = (BytesRef)value; if (val == null) { val = BinaryDocValuesUpdate.MISSING; } // grow the structures to have room for more elements if (Docs.Size() == Size) { Docs = Docs.Grow(Size + 1); Offsets = Offsets.Grow(Size + 1); Lengths = Lengths.Grow(Size + 1); DocsWithField = FixedBitSet.EnsureCapacity(DocsWithField, (int)Docs.Size()); } if (val != BinaryDocValuesUpdate.MISSING) { // only mark the document as having a value in that field if the value wasn't set to null (MISSING) DocsWithField.Set(Size); } Docs.Set(Size, doc); Offsets.Set(Size, Values.Length); Lengths.Set(Size, val.Length); Values.Append(val); ++Size; }
/// <summary> /// Create a sampled of the given hits. </summary> private MatchingDocs CreateSample(MatchingDocs docs) { int maxdoc = docs.context.Reader.MaxDoc; // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse FixedBitSet sampleDocs = new FixedBitSet(maxdoc); int binSize = (int)(1.0 / samplingRate); try { int counter = 0; int limit, randomIndex; if (leftoverBin != NOT_CALCULATED) { limit = leftoverBin; // either NOT_CALCULATED, which means we already sampled from that bin, // or the next document to sample randomIndex = leftoverIndex; } else { limit = binSize; randomIndex = random.NextInt(binSize); } DocIdSetIterator it = docs.bits.GetIterator(); for (int doc = it.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.NextDoc()) { if (counter == randomIndex) { sampleDocs.Set(doc); } counter++; if (counter >= limit) { counter = 0; limit = binSize; randomIndex = random.NextInt(binSize); } } if (counter == 0) { // we either exhausted the bin and the iterator at the same time, or // this segment had no results. in the latter case we might want to // carry leftover to the next segment as is, but that complicates the // code and doesn't seem so important. leftoverBin = leftoverIndex = NOT_CALCULATED; } else { leftoverBin = limit - counter; if (randomIndex > counter) { // the document to sample is in the next bin leftoverIndex = randomIndex - counter; } else if (randomIndex < counter) { // we sampled a document from the bin, so just skip over remaining // documents in the bin in the next segment. leftoverIndex = NOT_CALCULATED; } } return new MatchingDocs(docs.context, sampleDocs, docs.totalHits, null); } catch (IOException) { throw new Exception(); } }
/* Does in-place AND NOT of the bits provided by the * iterator. */ //public void AndNot(DocIdSetIterator iter) //{ // var obs = iter as OpenBitSetIterator; // if (obs != null && iter.DocID() == -1) // { // AndNot(obs.arr, obs.words); // // advance after last doc that would be accepted if standard // // iteration is used (to exhaust it): // obs.Advance(bits.Length); // } // else // { // int doc; // while ((doc = iter.NextDoc()) < bits.Length) // { // Clear(doc); // } // } //} /* this = this AND NOT other */ public void AndNot(FixedBitSet other) { AndNot(other.bits, other.bits.Length); }
public FixedBitSetIterator(FixedBitSet bitset) { enumerator = bitset.bits.GetEnumerator(); }
internal RandomBits(int maxDoc, double pctLive, Random random) { Bits = new FixedBitSet(maxDoc); for (int i = 0; i < maxDoc; i++) { if (random.NextDouble() <= pctLive) { Bits.Set(i); } } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, org.apache.lucene.search.Filter preserveFilter, boolean negateFilter) throws java.io.IOException public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, bool negateFilter) : base(context.reader()) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int maxDoc = in.maxDoc(); int maxDoc = @in.maxDoc(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.util.FixedBitSet bits = new org.apache.lucene.util.FixedBitSet(maxDoc); FixedBitSet bits = new FixedBitSet(maxDoc); // ignore livedocs here, as we filter them later: //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.search.DocIdSet docs = preserveFilter.getDocIdSet(context, null); DocIdSet docs = preserveFilter.getDocIdSet(context, null); if (docs != null) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.search.DocIdSetIterator it = docs.iterator(); DocIdSetIterator it = docs.GetEnumerator(); if (it != null) { bits.or(it); } } if (negateFilter) { bits.flip(0, maxDoc); } if (@in.hasDeletions()) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.util.Bits oldLiveDocs = in.getLiveDocs(); Bits oldLiveDocs = @in.LiveDocs; Debug.Assert(oldLiveDocs != null); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.search.DocIdSetIterator it = bits.iterator(); DocIdSetIterator it = bits.GetEnumerator(); for (int i = it.nextDoc(); i < maxDoc; i = it.nextDoc()) { if (!oldLiveDocs.get(i)) { // we can safely modify the current bit, as the iterator already stepped over it: bits.clear(i); } } } this.liveDocs = bits; this.numDocs_Renamed = bits.cardinality(); }
// maxAllowed = the "highest" we can index, but we will still // randomly index at lower IndexOption private FieldsProducer BuildIndex(Directory dir, FieldInfo.IndexOptions maxAllowed, bool allowPayloads, bool alwaysTestMax) { Codec codec = Codec; SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", MaxDoc, false, codec, null); int maxIndexOption = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToList().IndexOf(maxAllowed); if (VERBOSE) { Console.WriteLine("\nTEST: now build index"); } int maxIndexOptionNoOffsets = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToList().IndexOf(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); // TODO use allowPayloads var newFieldInfoArray = new FieldInfo[Fields.Count]; for (int fieldUpto = 0; fieldUpto < Fields.Count; fieldUpto++) { FieldInfo oldFieldInfo = FieldInfos.FieldInfo(fieldUpto); string pf = TestUtil.GetPostingsFormat(codec, oldFieldInfo.Name); int fieldMaxIndexOption; if (DoesntSupportOffsets.Contains(pf)) { fieldMaxIndexOption = Math.Min(maxIndexOptionNoOffsets, maxIndexOption); } else { fieldMaxIndexOption = maxIndexOption; } // Randomly picked the IndexOptions to index this // field with: FieldInfo.IndexOptions indexOptions = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToArray()[alwaysTestMax ? fieldMaxIndexOption : Random().Next(1 + fieldMaxIndexOption)]; bool doPayloads = indexOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads; newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.Name, true, fieldUpto, false, false, doPayloads, indexOptions, null, DocValuesType.NUMERIC, null); } FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray); // Estimate that flushed segment size will be 25% of // what we use in RAM: long bytes = TotalPostings * 8 + TotalPayloadBytes; SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, newFieldInfos, 32, null, new IOContext(new FlushInfo(MaxDoc, bytes))); FieldsConsumer fieldsConsumer = codec.PostingsFormat().FieldsConsumer(writeState); foreach (KeyValuePair<string, SortedDictionary<BytesRef, long>> fieldEnt in Fields) { string field = fieldEnt.Key; IDictionary<BytesRef, long> terms = fieldEnt.Value; FieldInfo fieldInfo = newFieldInfos.FieldInfo(field); FieldInfo.IndexOptions? indexOptions = fieldInfo.FieldIndexOptions; if (VERBOSE) { Console.WriteLine("field=" + field + " indexOtions=" + indexOptions); } bool doFreq = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS; bool doPos = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; bool doPayloads = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && allowPayloads; bool doOffsets = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; TermsConsumer termsConsumer = fieldsConsumer.AddField(fieldInfo); long sumTotalTF = 0; long sumDF = 0; FixedBitSet seenDocs = new FixedBitSet(MaxDoc); foreach (KeyValuePair<BytesRef, long> termEnt in terms) { BytesRef term = termEnt.Key; SeedPostings postings = GetSeedPostings(term.Utf8ToString(), termEnt.Value, false, maxAllowed); if (VERBOSE) { Console.WriteLine(" term=" + field + ":" + term.Utf8ToString() + " docFreq=" + postings.DocFreq + " seed=" + termEnt.Value); } PostingsConsumer postingsConsumer = termsConsumer.StartTerm(term); long totalTF = 0; int docID = 0; while ((docID = postings.NextDoc()) != DocsEnum.NO_MORE_DOCS) { int freq = postings.Freq(); if (VERBOSE) { Console.WriteLine(" " + postings.Upto + ": docID=" + docID + " freq=" + postings.Freq_Renamed); } postingsConsumer.StartDoc(docID, doFreq ? postings.Freq_Renamed : -1); seenDocs.Set(docID); if (doPos) { totalTF += postings.Freq_Renamed; for (int posUpto = 0; posUpto < freq; posUpto++) { int pos = postings.NextPosition(); BytesRef payload = postings.Payload; if (VERBOSE) { if (doPayloads) { Console.WriteLine(" pos=" + pos + " payload=" + (payload == null ? "null" : payload.Length + " bytes")); } else { Console.WriteLine(" pos=" + pos); } } postingsConsumer.AddPosition(pos, doPayloads ? payload : null, doOffsets ? postings.StartOffset() : -1, doOffsets ? postings.EndOffset() : -1); } } else if (doFreq) { totalTF += freq; } else { totalTF++; } postingsConsumer.FinishDoc(); } termsConsumer.FinishTerm(term, new TermStats(postings.DocFreq, doFreq ? totalTF : -1)); sumTotalTF += totalTF; sumDF += postings.DocFreq; } termsConsumer.Finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.Cardinality()); } fieldsConsumer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: after indexing: files="); foreach (string file in dir.ListAll()) { Console.WriteLine(" " + file + ": " + dir.FileLength(file) + " bytes"); } } CurrentFieldInfos = newFieldInfos; SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ, 1); return codec.PostingsFormat().FieldsProducer(readState); }
public static void AfterClass() { AllTerms = null; FieldInfos = null; Fields = null; GlobalLiveDocs = null; }
public static void CreatePostings() { TotalPostings = 0; TotalPayloadBytes = 0; Fields = new SortedDictionary<string, SortedDictionary<BytesRef, long>>(); int numFields = TestUtil.NextInt(Random(), 1, 5); if (VERBOSE) { Console.WriteLine("TEST: " + numFields + " fields"); } MaxDoc = 0; FieldInfo[] fieldInfoArray = new FieldInfo[numFields]; int fieldUpto = 0; while (fieldUpto < numFields) { string field = TestUtil.RandomSimpleString(Random()); if (Fields.ContainsKey(field)) { continue; } fieldInfoArray[fieldUpto] = new FieldInfo(field, true, fieldUpto, false, false, true, FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, null, DocValuesType.NUMERIC, null); fieldUpto++; SortedDictionary<BytesRef, long> postings = new SortedDictionary<BytesRef, long>(); Fields[field] = postings; HashSet<string> seenTerms = new HashSet<string>(); int numTerms; if (Random().Next(10) == 7) { numTerms = AtLeast(50); } else { numTerms = TestUtil.NextInt(Random(), 2, 20); } for (int termUpto = 0; termUpto < numTerms; termUpto++) { string term = TestUtil.RandomSimpleString(Random()); if (seenTerms.Contains(term)) { continue; } seenTerms.Add(term); if (TEST_NIGHTLY && termUpto == 0 && fieldUpto == 1) { // Make 1 big term: term = "big_" + term; } else if (termUpto == 1 && fieldUpto == 1) { // Make 1 medium term: term = "medium_" + term; } else if (Random().NextBoolean()) { // Low freq term: term = "low_" + term; } else { // Very low freq term (don't multiply by RANDOM_MULTIPLIER): term = "verylow_" + term; } long termSeed = Random().NextLong(); postings[new BytesRef(term)] = termSeed; // NOTE: sort of silly: we enum all the docs just to // get the maxDoc DocsEnum docsEnum = GetSeedPostings(term, termSeed, false, FieldInfo.IndexOptions.DOCS_ONLY); int doc; int lastDoc = 0; while ((doc = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { lastDoc = doc; } MaxDoc = Math.Max(lastDoc, MaxDoc); } } FieldInfos = new FieldInfos(fieldInfoArray); // It's the count, not the last docID: MaxDoc++; GlobalLiveDocs = new FixedBitSet(MaxDoc); double liveRatio = Random().NextDouble(); for (int i = 0; i < MaxDoc; i++) { if (Random().NextDouble() <= liveRatio) { GlobalLiveDocs.Set(i); } } AllTerms = new List<FieldAndTerm>(); foreach (KeyValuePair<string, SortedDictionary<BytesRef, long>> fieldEnt in Fields) { string field = fieldEnt.Key; foreach (KeyValuePair<BytesRef, long> termEnt in fieldEnt.Value.EntrySet()) { AllTerms.Add(new FieldAndTerm(field, termEnt.Key)); } } if (VERBOSE) { Console.WriteLine("TEST: done init postings; " + AllTerms.Count + " total terms, across " + FieldInfos.Size() + " fields"); } }
public BinaryDocValuesFieldUpdates(string field, int maxDoc) : base(field, Type_e.BINARY) { DocsWithField = new FixedBitSet(64); Docs = new PagedMutable(1, 1024, PackedInts.BitsRequired(maxDoc - 1), PackedInts.COMPACT); Offsets = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST); Lengths = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST); Values = new BytesRef(16); // start small Size = 0; }
public override void Add(int doc, object value) { // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation if (Size == int.MaxValue) { throw new InvalidOperationException("cannot support more than Integer.MAX_VALUE doc/value entries"); } BytesRef val = (BytesRef)value; if (val == null) { val = BinaryDocValuesUpdate.MISSING; } // grow the structures to have room for more elements if (Docs.Size() == Size) { Docs = Docs.Grow(Size + 1); Offsets = Offsets.Grow(Size + 1); Lengths = Lengths.Grow(Size + 1); DocsWithField = FixedBitSet.EnsureCapacity(DocsWithField, (int)Docs.Size()); } if (val != BinaryDocValuesUpdate.MISSING) { // only mark the document as having a value in that field if the value wasn't set to null (MISSING) DocsWithField.Set(Size); } Docs.Set(Size, doc); Offsets.Set(Size, Values.Length); Lengths.Set(Size, val.Length); Values.Append(val); ++Size; }
private FixedBitSet makeFixedBitSet(int[] a, int numBits) { FixedBitSet bs = new FixedBitSet(numBits); foreach (int e in a) { bs.Set(e); } return bs; }
public void testEquals() { // This test can't handle numBits==0: int numBits = rnd.Next(2000) + 1; FixedBitSet b1 = new FixedBitSet(numBits); FixedBitSet b2 = new FixedBitSet(numBits); Assert.IsTrue(b1.Equals(b2)); Assert.IsTrue(b2.Equals(b1)); for (int iter = 0; iter < 10 * rnd.Next(500); iter++) { int idx = rnd.Next(numBits); if (!b1.Get(idx)) { b1.Set(idx); Assert.IsFalse(b1.Equals(b2)); Assert.IsFalse(b2.Equals(b1)); b2.Set(idx); Assert.IsTrue(b1.Equals(b2)); Assert.IsTrue(b2.Equals(b1)); } } // try different type of object Assert.IsFalse(b1.Equals(new Object())); }
public override void Merge(DocValuesFieldUpdates other) { Debug.Assert(other is NumericDocValuesFieldUpdates); NumericDocValuesFieldUpdates otherUpdates = (NumericDocValuesFieldUpdates)other; if (Size + otherUpdates.Size > int.MaxValue) { throw new InvalidOperationException("cannot support more than Integer.MAX_VALUE doc/value entries; size=" + Size + " other.size=" + otherUpdates.Size); } Docs = Docs.Grow(Size + otherUpdates.Size); Values = Values.Grow(Size + otherUpdates.Size); DocsWithField = FixedBitSet.EnsureCapacity(DocsWithField, (int)Docs.Size()); for (int i = 0; i < otherUpdates.Size; i++) { int doc = (int)otherUpdates.Docs.Get(i); if (otherUpdates.DocsWithField.Get(i)) { DocsWithField.Set(Size); } Docs.Set(Size, doc); Values.Set(Size, otherUpdates.Values.Get(i)); ++Size; } }
protected internal override void Start() { inside = new FixedBitSet(m_maxDoc); outside = new FixedBitSet(m_maxDoc); }
protected internal virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms) { Assert.AreEqual(1, terms.DocCount); int termCount = (new HashSet<string>(Arrays.AsList(tk.Terms))).Count; Assert.AreEqual(termCount, terms.Size()); Assert.AreEqual(termCount, terms.SumDocFreq); Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions()); Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets()); Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads()); HashSet<BytesRef> uniqueTerms = new HashSet<BytesRef>(); foreach (string term in tk.Freqs.Keys) { uniqueTerms.Add(new BytesRef(term)); } BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/); Array.Sort(sortedTerms, terms.Comparator); TermsEnum termsEnum = terms.Iterator(Random().NextBoolean() ? null : this.termsEnum.Value); this.termsEnum.Value = termsEnum; for (int i = 0; i < sortedTerms.Length; ++i) { BytesRef nextTerm = termsEnum.Next(); Assert.AreEqual(sortedTerms[i], nextTerm); Assert.AreEqual(sortedTerms[i], termsEnum.Term()); Assert.AreEqual(1, termsEnum.DocFreq()); FixedBitSet bits = new FixedBitSet(1); DocsEnum docsEnum = termsEnum.Docs(bits, Random().NextBoolean() ? null : this.docsEnum.Value); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); bits.Set(0); docsEnum = termsEnum.Docs(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsEnum); Assert.IsNotNull(docsEnum); Assert.AreEqual(0, docsEnum.NextDoc()); Assert.AreEqual(0, docsEnum.DocID()); Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)docsEnum.Freq()); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); this.docsEnum.Value = docsEnum; bits.Clear(0); DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random().NextBoolean() ? null : this.docsAndPositionsEnum.Value); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (docsAndPositionsEnum != null) { Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } bits.Set(0); docsAndPositionsEnum = termsEnum.DocsAndPositions(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsAndPositionsEnum); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (terms.HasPositions() || terms.HasOffsets()) { Assert.AreEqual(0, docsAndPositionsEnum.NextDoc()); int freq = docsAndPositionsEnum.Freq(); Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)freq); if (docsAndPositionsEnum != null) { for (int k = 0; k < freq; ++k) { int position = docsAndPositionsEnum.NextPosition(); ISet<int?> indexes; if (terms.HasPositions()) { indexes = tk.PositionToTerms[position]; Assert.IsNotNull(indexes); } else { indexes = tk.StartOffsetToTerms[docsAndPositionsEnum.StartOffset()]; Assert.IsNotNull(indexes); } if (terms.HasPositions()) { bool foundPosition = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.Positions[index] == position) { foundPosition = true; break; } } Assert.IsTrue(foundPosition); } if (terms.HasOffsets()) { bool foundOffset = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.StartOffsets[index] == docsAndPositionsEnum.StartOffset() && tk.EndOffsets[index] == docsAndPositionsEnum.EndOffset()) { foundOffset = true; break; } } Assert.IsTrue(foundOffset); } if (terms.HasPayloads()) { bool foundPayload = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && Equals(tk.Payloads[index], docsAndPositionsEnum.Payload)) { foundPayload = true; break; } } Assert.IsTrue(foundPayload); } } try { docsAndPositionsEnum.NextPosition(); Assert.Fail(); } catch (Exception e) { // ok } } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } this.docsAndPositionsEnum.Value = docsAndPositionsEnum; } Assert.IsNull(termsEnum.Next()); for (int i = 0; i < 5; ++i) { if (Random().NextBoolean()) { Assert.IsTrue(termsEnum.SeekExact(RandomInts.RandomFrom(Random(), tk.TermBytes))); } else { Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomInts.RandomFrom(Random(), tk.TermBytes))); } } }
public InPlaceMergeSorterAnonymousInnerClassHelper(BinaryDocValuesFieldUpdates outerInstance, PagedMutable docs, PagedGrowableWriter offsets, PagedGrowableWriter lengths, FixedBitSet docsWithField) { this.OuterInstance = outerInstance; this.Docs = docs; this.Offsets = offsets; this.Lengths = lengths; this.DocsWithField = docsWithField; }
public InPlaceMergeSorterAnonymousInnerClassHelper(NumericDocValuesFieldUpdates outerInstance, PagedMutable docs, PagedGrowableWriter values, FixedBitSet docsWithField) { this.outerInstance = outerInstance; this.docs = docs; this.values = values; this.docsWithField = docsWithField; }
public override void Merge(DocValuesFieldUpdates other) { BinaryDocValuesFieldUpdates otherUpdates = (BinaryDocValuesFieldUpdates)other; int newSize = Size + otherUpdates.Size; if (newSize > int.MaxValue) { throw new InvalidOperationException("cannot support more than Integer.MAX_VALUE doc/value entries; size=" + Size + " other.size=" + otherUpdates.Size); } Docs = Docs.Grow(newSize); Offsets = Offsets.Grow(newSize); Lengths = Lengths.Grow(newSize); DocsWithField = FixedBitSet.EnsureCapacity(DocsWithField, (int)Docs.Size()); for (int i = 0; i < otherUpdates.Size; i++) { int doc = (int)otherUpdates.Docs.Get(i); if (otherUpdates.DocsWithField.Get(i)) { DocsWithField.Set(Size); } Docs.Set(Size, doc); Offsets.Set(Size, Values.Length + otherUpdates.Offsets.Get(i)); // correct relative offset Lengths.Set(Size, otherUpdates.Lengths.Get(i)); ++Size; } Values.Append(otherUpdates.Values); }
public void testSmallBitSets() { // Make sure size 0-10 bit sets are OK: for (int numBits = 0; numBits < 10; numBits++) { FixedBitSet b1 = new FixedBitSet(numBits); FixedBitSet b2 = new FixedBitSet(numBits); Assert.IsTrue(b1.Equals(b2)); Assert.AreEqual(b1.GetHashCode(), b2.GetHashCode()); Assert.AreEqual(0, b1.Cardinality()); if (numBits > 0) { b1.Set(0, numBits); Assert.AreEqual(numBits, b1.Cardinality()); //b1.Flip(0, numBits); //Assert.AreEqual(0, b1.Cardinality()); } } }
/* Does in-place OR of the bits provided by the * iterator. */ //public void Or(DocIdSetIterator iter) //{ // if (iter is OpenBitSetIterator && iter.DocID() == -1) // { // var obs = (OpenBitSetIterator)iter; // Or(obs.arr, obs.words); // // advance after last doc that would be accepted if standard // // iteration is used (to exhaust it): // obs.Advance(bits.Length); // } // else // { // int doc; // while ((doc = iter.NextDoc()) < bits.Length) // { // Set(doc); // } // } //} /* this = this OR other */ public void Or(FixedBitSet other) { Or(other.bits, other.bits.Length); }
public void testHashCodeEquals() { // This test can't handle numBits==0: int numBits = rnd.Next(2000) + 1; FixedBitSet b1 = new FixedBitSet(numBits); FixedBitSet b2 = new FixedBitSet(numBits); Assert.IsTrue(b1.Equals(b2)); Assert.IsTrue(b2.Equals(b1)); for (int iter = 0; iter < 10 * rnd.Next(500); iter++) { int idx = rnd.Next(numBits); if (!b1.Get(idx)) { b1.Set(idx); Assert.IsFalse(b1.Equals(b2)); Assert.AreNotEqual(b1.GetHashCode(), b2.GetHashCode()); b2.Set(idx); Assert.AreEqual(b1, b2); Assert.AreEqual(b1.GetHashCode(), b2.GetHashCode()); } } }
/// <summary> /// Default merge impl </summary> public virtual void Merge(MergeState mergeState, FieldInfo.IndexOptions? indexOptions, TermsEnum termsEnum) { BytesRef term; Debug.Assert(termsEnum != null); long sumTotalTermFreq = 0; long sumDocFreq = 0; long sumDFsinceLastAbortCheck = 0; FixedBitSet visitedDocs = new FixedBitSet(mergeState.SegmentInfo.DocCount); if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { if (DocsEnum == null) { DocsEnum = new MappingMultiDocsEnum(); } DocsEnum.MergeState = mergeState; MultiDocsEnum docsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: docsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsEnumIn, Index.DocsEnum.FLAG_NONE); if (docsEnumIn != null) { DocsEnum.Reset(docsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, DocsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.DocFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS) { if (DocsAndFreqsEnum == null) { DocsAndFreqsEnum = new MappingMultiDocsEnum(); } DocsAndFreqsEnum.MergeState = mergeState; MultiDocsEnum docsAndFreqsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: docsAndFreqsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsAndFreqsEnumIn); Debug.Assert(docsAndFreqsEnumIn != null); DocsAndFreqsEnum.Reset(docsAndFreqsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, DocsAndFreqsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { if (PostingsEnum == null) { PostingsEnum = new MappingMultiDocsAndPositionsEnum(); } PostingsEnum.MergeState = mergeState; MultiDocsAndPositionsEnum postingsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn, DocsAndPositionsEnum.FLAG_PAYLOADS); Debug.Assert(postingsEnumIn != null); PostingsEnum.Reset(postingsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, PostingsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } else { Debug.Assert(indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); if (PostingsEnum == null) { PostingsEnum = new MappingMultiDocsAndPositionsEnum(); } PostingsEnum.MergeState = mergeState; MultiDocsAndPositionsEnum postingsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn); Debug.Assert(postingsEnumIn != null); PostingsEnum.Reset(postingsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, PostingsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } Finish(indexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.Cardinality()); }
/// <summary> /// Default merge impl: append documents, mapping around /// deletes /// </summary> public virtual TermStats Merge(MergeState mergeState, FieldInfo.IndexOptions? indexOptions, DocsEnum postings, FixedBitSet visitedDocs) { int df = 0; long totTF = 0; if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { while (true) { int doc = postings.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); this.StartDoc(doc, -1); this.FinishDoc(); df++; } totTF = -1; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS) { while (true) { int doc = postings.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postings.Freq(); this.StartDoc(doc, freq); this.FinishDoc(); df++; totTF += freq; } } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { var postingsEnum = (DocsAndPositionsEnum)postings; while (true) { int doc = postingsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postingsEnum.Freq(); this.StartDoc(doc, freq); totTF += freq; for (int i = 0; i < freq; i++) { int position = postingsEnum.NextPosition(); BytesRef payload = postingsEnum.Payload; this.AddPosition(position, payload, -1, -1); } this.FinishDoc(); df++; } } else { Debug.Assert(indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); var postingsEnum = (DocsAndPositionsEnum)postings; while (true) { int doc = postingsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postingsEnum.Freq(); this.StartDoc(doc, freq); totTF += freq; for (int i = 0; i < freq; i++) { int position = postingsEnum.NextPosition(); BytesRef payload = postingsEnum.Payload; this.AddPosition(position, payload, postingsEnum.StartOffset(), postingsEnum.EndOffset()); } this.FinishDoc(); df++; } } return new TermStats(df, indexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : totTF); }
/// <summary> /// Makes full copy. /// </summary> /// <param name="other"></param> public FixedBitSet(FixedBitSet other) { bits = new BitArray(other.bits); }