/// <summary> /// Create a random set which has <paramref name="numBitsSet"/> of its <paramref name="numBits"/> bits set. </summary> protected static OpenBitSet RandomOpenSet(int numBits, int numBitsSet) { if (Debugging.AssertsEnabled) { Debugging.Assert(numBitsSet <= numBits); } OpenBitSet set = new OpenBitSet(numBits); Random random = Random; if (numBitsSet == numBits) { set.Set(0, numBits); } else { for (int i = 0; i < numBitsSet; ++i) { while (true) { int o = random.Next(numBits); if (!set.Get(o)) { set.Set(o); break; } } } } return(set); }
/// <summary> /// Asserts that the documents returned by <paramref name="q1"/> /// are a subset of those returned by <paramref name="q2"/>. /// <para/> /// Both queries will be filtered by <paramref name="filter"/>. /// </summary> protected virtual void AssertSubsetOf(Query q1, Query q2, Filter filter) { // TRUNK ONLY: test both filter code paths if (filter != null && Random.NextBoolean()) { q1 = new FilteredQuery(q1, filter, TestUtil.RandomFilterStrategy(Random)); q2 = new FilteredQuery(q2, filter, TestUtil.RandomFilterStrategy(Random)); filter = null; } // not efficient, but simple! TopDocs td1 = m_s1.Search(q1, filter, m_reader.MaxDoc); TopDocs td2 = m_s2.Search(q2, filter, m_reader.MaxDoc); Assert.IsTrue(td1.TotalHits <= td2.TotalHits); // fill the superset into a bitset var bitset = new BitSet(td2.ScoreDocs.Length); for (int i = 0; i < td2.ScoreDocs.Length; i++) { bitset.Set(td2.ScoreDocs[i].Doc); } // check in the subset, that every bit was set by the super for (int i = 0; i < td1.ScoreDocs.Length; i++) { Assert.IsTrue(bitset.Get(td1.ScoreDocs[i].Doc)); } }
public void TestHashCodeEquals() { OpenBitSet bs1 = new OpenBitSet(200); OpenBitSet bs2 = new OpenBitSet(64); bs1.Set(3); bs2.Set(3); Assert.AreEqual(bs1, bs2); Assert.AreEqual(bs1.GetHashCode(), bs2.GetHashCode()); }
public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSet bitSet = new OpenBitSet(reader.NumDocs()); TermDocs termDocs = reader.TermDocs(new Term("TenantId", _tenantId)); while (termDocs.Next()) { if (termDocs.Freq > 0) { bitSet.Set(termDocs.Doc); } } return bitSet; }
public virtual void TestEquals() { rand = NewRandom(); OpenBitSet b1 = new OpenBitSet(1111); OpenBitSet b2 = new OpenBitSet(2222); Assert.IsTrue(b1.Equals(b2)); Assert.IsTrue(b2.Equals(b1)); b1.Set(10); Assert.IsFalse(b1.Equals(b2)); Assert.IsFalse(b2.Equals(b1)); b2.Set(10); Assert.IsTrue(b1.Equals(b2)); Assert.IsTrue(b2.Equals(b1)); b2.Set(2221); Assert.IsFalse(b1.Equals(b2)); Assert.IsFalse(b2.Equals(b1)); b1.Set(2221); Assert.IsTrue(b1.Equals(b2)); Assert.IsTrue(b2.Equals(b1)); // try different type of object Assert.IsFalse(b1.Equals(new System.Object())); }
/// <summary> /// Get the DocIdSet. /// </summary> /// <param name="reader">Applcible reader.</param> /// <returns>The set.</returns> public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSet result = new OpenBitSet(reader.MaxDoc()); TermDocs td = reader.TermDocs(); try { foreach (Term t in this.terms) { td.Seek(t); while (td.Next()) { result.Set(td.Doc()); } } } finally { td.Close(); } return result; }
public virtual void TestIntersection() { int numBits = TestUtil.NextInt32(Random, 100, 1 << 20); int numDocIdSets = TestUtil.NextInt32(Random, 1, 4); IList <OpenBitSet> fixedSets = new List <OpenBitSet>(numDocIdSets); for (int i = 0; i < numDocIdSets; ++i) { fixedSets.Add(RandomOpenSet(numBits, Random.NextSingle())); } IList <WAH8DocIdSet> compressedSets = new List <WAH8DocIdSet>(numDocIdSets); foreach (OpenBitSet set in fixedSets) { compressedSets.Add(CopyOf(set, numBits)); } WAH8DocIdSet union = WAH8DocIdSet.Intersect(compressedSets); OpenBitSet expected = new OpenBitSet(numBits); expected.Set(0, expected.Length); foreach (OpenBitSet set in fixedSets) { for (int previousDoc = -1, doc = set.NextSetBit(0); ; previousDoc = doc, doc = set.NextSetBit(doc + 1)) { if (doc == -1) { expected.Clear(previousDoc + 1, set.Length); break; } else { expected.Clear(previousDoc + 1, doc); } } } AssertEquals(numBits, expected, union); }
private static void Walk <T>(FST <T> fst) // LUCENENET NOTE: Not referenced anywhere { var queue = new List <FST.Arc <T> >(); // Java version was BitSet(), but in .NET we don't have a zero contructor BitSet. // Couldn't find the default size in BitSet, so went with zero here. var seen = new BitSet(); var reader = fst.GetBytesReader(); var startArc = fst.GetFirstArc(new FST.Arc <T>()); queue.Add(startArc); while (queue.Count > 0) { //FST.Arc<T> arc = queue.Remove(0); var arc = queue[0]; queue.RemoveAt(0); long node = arc.Target; //System.out.println(arc); if (FST <T> .TargetHasArcs(arc) && !seen.Get((int)node)) { seen.Set((int)node); fst.ReadFirstRealTargetArc(node, arc, reader); while (true) { queue.Add((new FST.Arc <T>()).CopyFrom(arc)); if (arc.IsLast) { break; } else { fst.ReadNextRealArc(arc, reader); } } } } }
internal virtual void DoRandomSets(int maxSize, int iter, int mode) { System.Collections.BitArray a0 = null; OpenBitSet b0 = null; for (int i = 0; i < iter; i++) { int sz = rand.Next(maxSize); System.Collections.BitArray a = new System.Collections.BitArray(sz); OpenBitSet b = new OpenBitSet(sz); // test the various ways of setting bits if (sz > 0) { int nOper = rand.Next(sz); for (int j = 0; j < nOper; j++) { int idx; idx = rand.Next(sz); a.Set(idx, true); b.FastSet(idx); idx = rand.Next(sz); a.Set(idx, false); b.FastClear(idx); idx = rand.Next(sz); a.Set(idx, !a.Get(idx)); b.FastFlip(idx); bool val = b.FlipAndGet(idx); bool val2 = b.FlipAndGet(idx); Assert.IsTrue(val != val2); val = b.GetAndSet(idx); Assert.IsTrue(val2 == val); Assert.IsTrue(b.Get(idx)); if (!val) { b.FastClear(idx); } Assert.IsTrue(b.Get(idx) == val); } } // test that the various ways of accessing the bits are equivalent DoGet(a, b); // {{dougsale-2.4.0}} // // Java's java.util.BitSet automatically grows as needed - i.e., when a bit is referenced beyond // the size of the BitSet, an exception isn't thrown - rather, the set grows to the size of the // referenced bit. // // System.Collections.BitArray does not have this feature, and thus I've faked it here by // "growing" the array explicitly when necessary (creating a new instance of the appropriate size // and setting the appropriate bits). // // test ranges, including possible extension int fromIndex, toIndex; fromIndex = rand.Next(sz + 80); toIndex = fromIndex + rand.Next((sz >> 1) + 1); // {{dougsale-2.4.0}}: // The following commented-out, compound statement's 'for loop' implicitly grows the Java BitSets 'a' // and 'aa' to the same cardinality as 'j+1' when 'a.Count < j+1' and 'fromIndex < toIndex': //BitArray aa = (BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) aa.Set(j, !a.Get(j)); // So, if necessary, lets explicitly grow 'a' now; then 'a' and its clone, 'aa', will be of the required size. if (a.Length < toIndex && fromIndex < toIndex) { System.Collections.BitArray tmp = new System.Collections.BitArray(toIndex, false); for (int k = 0; k < a.Length; k++) { tmp.Set(k, a.Get(k)); } a = tmp; } // {{dougsale-2.4.0}}: now we can invoke this statement without going 'out-of-bounds' System.Collections.BitArray aa = (System.Collections.BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) { aa.Set(j, !a.Get(j)); } OpenBitSet bb = (OpenBitSet)b.Clone(); bb.Flip(fromIndex, toIndex); DoIterate(aa, bb, mode); // a problem here is from flip or doIterate fromIndex = rand.Next(sz + 80); toIndex = fromIndex + rand.Next((sz >> 1) + 1); // {{dougsale-2.4.0}}: // The following commented-out, compound statement's 'for loop' implicitly grows the Java BitSet 'aa' // when 'a.Count < j+1' and 'fromIndex < toIndex' //aa = (BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) aa.Set(j, false); // So, if necessary, lets explicitly grow 'aa' now if (a.Length < toIndex && fromIndex < toIndex) { aa = new System.Collections.BitArray(toIndex); for (int k = 0; k < a.Length; k++) { aa.Set(k, a.Get(k)); } } else { aa = (System.Collections.BitArray)a.Clone(); } for (int j = fromIndex; j < toIndex; j++) { aa.Set(j, false); } bb = (OpenBitSet)b.Clone(); bb.Clear(fromIndex, toIndex); DoNextSetBit(aa, bb); // a problem here is from clear() or nextSetBit fromIndex = rand.Next(sz + 80); toIndex = fromIndex + rand.Next((sz >> 1) + 1); // {{dougsale-2.4.0}}: // The following commented-out, compound statement's 'for loop' implicitly grows the Java BitSet 'aa' // when 'a.Count < j+1' and 'fromIndex < toIndex' //aa = (BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) aa.Set(j, false); // So, if necessary, lets explicitly grow 'aa' now if (a.Length < toIndex && fromIndex < toIndex) { aa = new System.Collections.BitArray(toIndex); for (int k = 0; k < a.Length; k++) { aa.Set(k, a.Get(k)); } } else { aa = (System.Collections.BitArray)a.Clone(); } for (int j = fromIndex; j < toIndex; j++) { aa.Set(j, true); } bb = (OpenBitSet)b.Clone(); bb.Set(fromIndex, toIndex); DoNextSetBit(aa, bb); // a problem here is from set() or nextSetBit if (a0 != null) { Assert.AreEqual(a.Equals(a0), b.Equals(b0)); Assert.AreEqual(BitSetSupport.Cardinality(a), b.Cardinality()); // {{dougsale-2.4.0}} // // The Java code used java.util.BitSet, which grows as needed. // When a bit, outside the dimension of the set is referenced, // the set automatically grows to the necessary size. The // new entries default to false. // // BitArray does not grow automatically and is not growable. // Thus when BitArray instances of mismatched cardinality // interact, we must first explicitly "grow" the smaller one. // // This growth is acheived by creating a new instance of the // required size and copying the appropriate values. // //BitArray a_and = (BitArray)a.Clone(); a_and.And(a0); //BitArray a_or = (BitArray)a.Clone(); a_or.Or(a0); //BitArray a_xor = (BitArray)a.Clone(); a_xor.Xor(a0); //BitArray a_andn = (BitArray)a.Clone(); for (int j = 0; j < a_andn.Count; j++) if (a0.Get(j)) a_andn.Set(j, false); System.Collections.BitArray a_and; System.Collections.BitArray a_or; System.Collections.BitArray a_xor; System.Collections.BitArray a_andn; if (a.Length < a0.Length) { // the Java code would have implicitly resized 'a_and', 'a_or', 'a_xor', and 'a_andn' // in this case, so we explicitly create a resized stand-in for 'a' here, allowing for // a to keep its original size while 'a_and', 'a_or', 'a_xor', and 'a_andn' are resized System.Collections.BitArray tmp = new System.Collections.BitArray(a0.Length, false); for (int z = 0; z < a.Length; z++) { tmp.Set(z, a.Get(z)); } a_and = (System.Collections.BitArray)tmp.Clone(); a_and.And(a0); a_or = (System.Collections.BitArray)tmp.Clone(); a_or.Or(a0); a_xor = (System.Collections.BitArray)tmp.Clone(); a_xor.Xor(a0); a_andn = (System.Collections.BitArray)tmp.Clone(); for (int j = 0; j < a_andn.Length; j++) { if (a0.Get(j)) { a_andn.Set(j, false); } } } else if (a.Length > a0.Length) { // the Java code would have implicitly resized 'a0' in this case, so // we explicitly do so here: System.Collections.BitArray tmp = new System.Collections.BitArray(a.Length, false); for (int z = 0; z < a0.Length; z++) { tmp.Set(z, a0.Get(z)); } a0 = tmp; a_and = (System.Collections.BitArray)a.Clone(); a_and.And(a0); a_or = (System.Collections.BitArray)a.Clone(); a_or.Or(a0); a_xor = (System.Collections.BitArray)a.Clone(); a_xor.Xor(a0); a_andn = (System.Collections.BitArray)a.Clone(); for (int j = 0; j < a_andn.Length; j++) { if (a0.Get(j)) { a_andn.Set(j, false); } } } else { // 'a' and 'a0' are the same size, no explicit growing necessary a_and = (System.Collections.BitArray)a.Clone(); a_and.And(a0); a_or = (System.Collections.BitArray)a.Clone(); a_or.Or(a0); a_xor = (System.Collections.BitArray)a.Clone(); a_xor.Xor(a0); a_andn = (System.Collections.BitArray)a.Clone(); for (int j = 0; j < a_andn.Length; j++) { if (a0.Get(j)) { a_andn.Set(j, false); } } } OpenBitSet b_and = (OpenBitSet)b.Clone(); Assert.AreEqual(b, b_and); b_and.And(b0); OpenBitSet b_or = (OpenBitSet)b.Clone(); b_or.Or(b0); OpenBitSet b_xor = (OpenBitSet)b.Clone(); b_xor.Xor(b0); OpenBitSet b_andn = (OpenBitSet)b.Clone(); b_andn.AndNot(b0); DoIterate(a_and, b_and, mode); DoIterate(a_or, b_or, mode); DoIterate(a_xor, b_xor, mode); DoIterate(a_andn, b_andn, mode); Assert.AreEqual(BitSetSupport.Cardinality(a_and), b_and.Cardinality()); Assert.AreEqual(BitSetSupport.Cardinality(a_or), b_or.Cardinality()); Assert.AreEqual(BitSetSupport.Cardinality(a_xor), b_xor.Cardinality()); Assert.AreEqual(BitSetSupport.Cardinality(a_andn), b_andn.Cardinality()); // test non-mutating popcounts Assert.AreEqual(b_and.Cardinality(), OpenBitSet.IntersectionCount(b, b0)); Assert.AreEqual(b_or.Cardinality(), OpenBitSet.UnionCount(b, b0)); Assert.AreEqual(b_xor.Cardinality(), OpenBitSet.XorCount(b, b0)); Assert.AreEqual(b_andn.Cardinality(), OpenBitSet.AndNotCount(b, b0)); } a0 = a; b0 = b; } }
private OpenBitSet CorrectBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); //assume all are INvalid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term(); while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned { int lastDoc = -1; //set non duplicates TermDocs td = reader.TermDocs(currTerm); if (td.Next()) { if (keepMode == KM_USE_FIRST_OCCURRENCE) { bits.Set(td.Doc()); } else { do { lastDoc = td.Doc(); } while (td.Next()); bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term(); } } return bits; }
private OpenBitSet FastBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); bits.Set(0, reader.MaxDoc()); //assume all are valid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term(); while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned { if (te.DocFreq() > 1) { int lastDoc = -1; //unset potential duplicates TermDocs td = reader.TermDocs(currTerm); td.Next(); if (keepMode == KM_USE_FIRST_OCCURRENCE) { td.Next(); } do { lastDoc = td.Doc(); bits.Clear(lastDoc); } while (td.Next()); if (keepMode == KM_USE_LAST_OCCURRENCE) { //restore the last bit bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term(); } } return bits; }
/// <summary> /// loads multi-value facet data. This method uses a workarea to prepare loading. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="workArea"></param> public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea) { long t0 = Environment.TickCount; int maxdoc = reader.MaxDoc; BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); TermEnum tenum = null; TermDocs tdoc = null; ITermValueList list = (listFactory == null ? (ITermValueList)new TermStringList() : listFactory.CreateTermList()); List<int> minIDList = new List<int>(); List<int> maxIDList = new List<int>(); List<int> freqList = new List<int>(); OpenBitSet bitset = new OpenBitSet(); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) break; string val = term.Text; if (val != null) { list.Add(val); tdoc.Seek(tenum); //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs int df = 0; int minID = -1; int maxID = -1; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!loader.Add(docid, valId)) LogOverflow(fieldName); minID = docid; bitset.Set(docid); while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!loader.Add(docid, valId)) LogOverflow(fieldName); bitset.Set(docid); } maxID = docid; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } t++; } while (tenum.Next()); } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); try { _nestedArray.Load(maxdoc + 1, loader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }