/// <summary> /// Asserts that the documents returned by <code>q1</code> /// are a subset of those returned by <code>q2</code>. /// /// Both queries will be filtered by <code>filter</code> /// </summary> protected internal virtual void AssertSubsetOf(Query q1, Query q2, Filter filter) { // TRUNK ONLY: test both filter code paths if (filter != null && Random().NextBoolean()) { q1 = new FilteredQuery(q1, filter, TestUtil.RandomFilterStrategy(Random())); q2 = new FilteredQuery(q2, filter, TestUtil.RandomFilterStrategy(Random())); filter = null; } // not efficient, but simple! TopDocs td1 = S1.Search(q1, filter, Reader.MaxDoc); TopDocs td2 = S2.Search(q2, filter, Reader.MaxDoc); Assert.IsTrue(td1.TotalHits <= td2.TotalHits); // fill the superset into a bitset var bitset = new BitArray(td2.ScoreDocs.Length); for (int i = 0; i < td2.ScoreDocs.Length; i++) { bitset.SafeSet(td2.ScoreDocs[i].Doc, true); } // check in the subset, that every bit was set by the super for (int i = 0; i < td1.ScoreDocs.Length; i++) { Assert.IsTrue(bitset.SafeGet(td1.ScoreDocs[i].Doc)); } }
private BitArray MakeBitSet(int[] a) { BitArray bs = new BitArray(a.Length); foreach (int e in a) { bs.SafeSet(e, true); } return bs; }
internal virtual void DoRandomSets(int maxSize, int iter, int mode) { BitArray a0 = null; LongBitSet b0 = null; for (int i = 0; i < iter; i++) { int sz = TestUtil.NextInt(Random(), 2, maxSize); BitArray a = new BitArray(sz); LongBitSet b = new LongBitSet(sz); // test the various ways of setting bits if (sz > 0) { int nOper = Random().Next(sz); for (int j = 0; j < nOper; j++) { int idx; idx = Random().Next(sz); a.SafeSet(idx, true); b.Set(idx); idx = Random().Next(sz); a.SafeSet(idx, false); b.Clear(idx); idx = Random().Next(sz); a.SafeSet(idx, !a.SafeGet(idx)); b.Flip(idx, idx + 1); idx = Random().Next(sz); a.SafeSet(idx, !a.SafeGet(idx)); b.Flip(idx, idx + 1); bool val2 = b.Get(idx); bool val = b.GetAndSet(idx); Assert.IsTrue(val2 == val); Assert.IsTrue(b.Get(idx)); if (!val) { b.Clear(idx); } Assert.IsTrue(b.Get(idx) == val); } } // test that the various ways of accessing the bits are equivalent DoGet(a, b); // test ranges, including possible extension int fromIndex, toIndex; fromIndex = Random().Next(sz / 2); toIndex = fromIndex + Random().Next(sz - fromIndex); BitArray aa = (BitArray)a.Clone(); aa.Flip(fromIndex, toIndex); LongBitSet bb = b.Clone(); bb.Flip(fromIndex, toIndex); fromIndex = Random().Next(sz / 2); toIndex = fromIndex + Random().Next(sz - fromIndex); aa = (BitArray)a.Clone(); aa.Clear(fromIndex, toIndex); bb = b.Clone(); bb.Clear(fromIndex, toIndex); DoNextSetBit(aa, bb); // a problem here is from clear() or nextSetBit DoPrevSetBit(aa, bb); fromIndex = Random().Next(sz / 2); toIndex = fromIndex + Random().Next(sz - fromIndex); aa = (BitArray)a.Clone(); aa.Set(fromIndex, toIndex); bb = b.Clone(); bb.Set(fromIndex, toIndex); DoNextSetBit(aa, bb); // a problem here is from set() or nextSetBit DoPrevSetBit(aa, bb); if (b0 != null && b0.Length() <= b.Length()) { Assert.AreEqual(a.Cardinality(), b.Cardinality()); BitArray a_and = (BitArray)a.Clone(); a_and = a_and.And_UnequalLengths(a0); BitArray a_or = (BitArray)a.Clone(); a_or = a_or.Or_UnequalLengths(a0); BitArray a_xor = (BitArray)a.Clone(); a_xor = a_xor.Xor_UnequalLengths(a0); BitArray a_andn = (BitArray)a.Clone(); a_andn.AndNot(a0); LongBitSet b_and = b.Clone(); Assert.AreEqual(b, b_and); b_and.And(b0); LongBitSet b_or = b.Clone(); b_or.Or(b0); LongBitSet b_xor = b.Clone(); b_xor.Xor(b0); LongBitSet b_andn = b.Clone(); b_andn.AndNot(b0); Assert.AreEqual(a0.Cardinality(), b0.Cardinality()); Assert.AreEqual(a_or.Cardinality(), b_or.Cardinality()); Assert.AreEqual(a_and.Cardinality(), b_and.Cardinality()); Assert.AreEqual(a_or.Cardinality(), b_or.Cardinality()); Assert.AreEqual(a_xor.Cardinality(), b_xor.Cardinality()); Assert.AreEqual(a_andn.Cardinality(), b_andn.Cardinality()); } a0 = a; b0 = b; } }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { bool nullBitset = Random().Next(10) == 5; AtomicReader reader = context.AtomicReader; DocsEnum termDocsEnum = reader.TermDocsEnum(new Term("field", "0")); if (termDocsEnum == null) { return null; // no docs -- return null } BitArray bitSet = new BitArray(reader.MaxDoc); int d; while ((d = termDocsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { bitSet.SafeSet(d, true); } return new DocIdSetAnonymousInnerClassHelper(this, nullBitset, reader, bitSet); }
public override Bits ReadLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) { Debug.Assert(info.HasDeletions()); var scratch = new BytesRef(); var scratchUtf16 = new CharsRef(); var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.DelGen); ChecksumIndexInput input = null; var success = false; try { input = dir.OpenChecksumInput(fileName, context); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SIZE)); var size = ParseIntAt(scratch, SIZE.Length, scratchUtf16); var bits = new BitArray(size); SimpleTextUtil.ReadLine(input, scratch); while (!scratch.Equals(END)) { Debug.Assert(StringHelper.StartsWith(scratch, DOC)); var docid = ParseIntAt(scratch, DOC.Length, scratchUtf16); bits.SafeSet(docid, true); SimpleTextUtil.ReadLine(input, scratch); } SimpleTextUtil.CheckFooter(input); success = true; return new SimpleTextBits(bits, size); } finally { if (success) { IOUtils.Close(input); } else { IOUtils.CloseWhileHandlingException(input); } } }
public virtual void TestCompact() { BytesRef @ref = new BytesRef(); int num = AtLeast(2); for (int j = 0; j < num; j++) { int numEntries = 0; const int size = 797; BitArray bits = new BitArray(size); for (int i = 0; i < size; i++) { string str; do { str = TestUtil.RandomRealisticUnicodeString(Random(), 1000); } while (str.Length == 0); @ref.CopyChars(str); int key = Hash.Add(@ref); if (key < 0) { Assert.IsTrue(bits.SafeGet((-key) - 1)); } else { Assert.IsFalse(bits.SafeGet(key)); bits.SafeSet(key, true); numEntries++; } } Assert.AreEqual(Hash.Size(), bits.Cardinality()); Assert.AreEqual(numEntries, bits.Cardinality()); Assert.AreEqual(numEntries, Hash.Size()); int[] compact = Hash.Compact(); Assert.IsTrue(numEntries < compact.Length); for (int i = 0; i < numEntries; i++) { bits.SafeSet(compact[i], false); } Assert.AreEqual(0, bits.Cardinality()); Hash.Clear(); Assert.AreEqual(0, Hash.Size()); Hash.Reinit(); } }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { if (acceptDocs == null) { acceptDocs = new Bits_MatchAllBits(5); } BitArray bitset = new BitArray(5); if (acceptDocs.Get(1)) { bitset.SafeSet(1, true); } if (acceptDocs.Get(3)) { bitset.SafeSet(3, true); } return new DocIdBitSet(bitset); }
/// <summary> /// Checks whether there is a loop containing s. (this is sufficient since /// there are never transitions to dead states.) /// </summary> // TODO: not great that this is recursive... in theory a // large automata could exceed java's stack private static bool IsFinite(State s, BitArray path, BitArray visited) { path.SafeSet(s.number, true); foreach (Transition t in s.Transitions) { if (path.SafeGet(t.To.number) || (!visited.SafeGet(t.To.number) && !IsFinite(t.To, path, visited))) { return false; } } path.SafeSet(s.number, false); visited.SafeSet(s.number, true); return true; }
public virtual BitArray RandBitSet(int sz, int numBitsToSet) { BitArray set = new BitArray(sz); for (int i = 0; i < numBitsToSet; i++) { set.SafeSet(Random().Next(sz), true); } return set; }
public virtual int DoTermConjunctions(IndexSearcher s, int termsInIndex, int maxClauses, int iter) { int ret = 0; long nMatches = 0; for (int i = 0; i < iter; i++) { int nClauses = Random().Next(maxClauses - 1) + 2; // min 2 clauses BooleanQuery bq = new BooleanQuery(); BitArray termflag = new BitArray(termsInIndex); for (int j = 0; j < nClauses; j++) { int tnum; // don't pick same clause twice tnum = Random().Next(termsInIndex); if (termflag.SafeGet(tnum)) { tnum = termflag.NextClearBit(tnum); } if (tnum < 0 || tnum >= termsInIndex) { tnum = termflag.NextClearBit(0); } termflag.SafeSet(tnum, true); Query tq = new TermQuery(Terms[tnum]); bq.Add(tq, BooleanClause.Occur.MUST); } CountingHitCollector hc = new CountingHitCollector(); s.Search(bq, hc); nMatches += hc.Count; ret += hc.Sum; } if (VERBOSE) { Console.WriteLine("Average number of matches=" + (nMatches / iter)); } return ret; }
/// <summary> /// Minimizes the given automaton using Hopcroft's algorithm. /// </summary> public static void MinimizeHopcroft(Automaton a) { a.Determinize(); if (a.Initial.numTransitions == 1) { Transition t = a.Initial.TransitionsArray[0]; if (t.To == a.Initial && t.Min_Renamed == Character.MIN_CODE_POINT && t.Max_Renamed == Character.MAX_CODE_POINT) { return; } } a.Totalize(); // initialize data structures int[] sigma = a.StartPoints; State[] states = a.NumberedStates; int sigmaLen = sigma.Length, statesLen = states.Length; List<State>[,] reverse = new List<State>[statesLen, sigmaLen]; HashSet<State>[] partition = new HashSet<State>[statesLen]; List<State>[] splitblock = new List<State>[statesLen]; int[] block = new int[statesLen]; StateList[,] active = new StateList[statesLen, sigmaLen]; StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen]; LinkedList<IntPair> pending = new LinkedList<IntPair>(); BitArray pending2 = new BitArray(sigmaLen * statesLen); BitArray split = new BitArray(statesLen), refine = new BitArray(statesLen), refine2 = new BitArray(statesLen); for (int q = 0; q < statesLen; q++) { splitblock[q] = new List<State>(); partition[q] = new HashSet<State>(); for (int x = 0; x < sigmaLen; x++) { active[q, x] = new StateList(); } } // find initial partition and reverse edges for (int q = 0; q < statesLen; q++) { State qq = states[q]; int j = qq.accept ? 0 : 1; partition[j].Add(qq); block[q] = j; for (int x = 0; x < sigmaLen; x++) { //List<State>[] r = reverse[qq.Step(sigma[x]).number]; var r = qq.Step(sigma[x]).number; if (reverse[r, x] == null) { reverse[r, x] = new List<State>(); } reverse[r, x].Add(qq); } } // initialize active sets for (int j = 0; j <= 1; j++) { for (int x = 0; x < sigmaLen; x++) { foreach (State qq in partition[j]) { if (reverse[qq.number, x] != null) { active2[qq.number, x] = active[j, x].Add(qq); } } } } // initialize pending for (int x = 0; x < sigmaLen; x++) { int j = (active[0, x].Size <= active[1, x].Size) ? 0 : 1; pending.AddLast(new IntPair(j, x)); pending2.SafeSet(x * statesLen + j, true); } // process pending until fixed point int k = 2; while (pending.Count > 0) { IntPair ip = pending.First.Value; pending.RemoveFirst(); int p = ip.N1; int x = ip.N2; pending2.SafeSet(x * statesLen + p, false); // find states that need to be split off their blocks for (StateListNode m = active[p, x].First; m != null; m = m.Next) { List<State> r = reverse[m.q.number, x]; if (r != null) { foreach (State s in r) { int i = s.number; if (!split.SafeGet(i)) { split.SafeSet(i, true); int j = block[i]; splitblock[j].Add(s); if (!refine2.SafeGet(j)) { refine2.SafeSet(j, true); refine.SafeSet(j, true); } } } } } // refine blocks for (int j = Number.NextSetBit(refine, 0); j >= 0; j = Number.NextSetBit(refine, j + 1)) { List<State> sb = splitblock[j]; if (sb.Count < partition[j].Count) { HashSet<State> b1 = partition[j]; HashSet<State> b2 = partition[k]; foreach (State s in sb) { b1.Remove(s); b2.Add(s); block[s.number] = k; for (int c = 0; c < sigmaLen; c++) { StateListNode sn = active2[s.number, c]; if (sn != null && sn.Sl == active[j, c]) { sn.Remove(); active2[s.number, c] = active[k, c].Add(s); } } } // update pending for (int c = 0; c < sigmaLen; c++) { int aj = active[j, c].Size, ak = active[k, c].Size, ofs = c * statesLen; if (!pending2.SafeGet(ofs + j) && 0 < aj && aj <= ak) { pending2.SafeSet(ofs + j, true); pending.AddLast(new IntPair(j, c)); } else { pending2.SafeSet(ofs + k, true); pending.AddLast(new IntPair(k, c)); } } k++; } refine2.SafeSet(j, false); foreach (State s in sb) { split.SafeSet(s.number, false); } sb.Clear(); } refine.SetAll(false); } // make a new state for each equivalence class, set initial state State[] newstates = new State[k]; for (int n = 0; n < newstates.Length; n++) { State s = new State(); newstates[n] = s; foreach (State q in partition[n]) { if (q == a.Initial) { a.Initial = s; } s.accept = q.accept; s.number = q.number; // select representative q.number = n; } } // build transitions and set acceptance for (int n = 0; n < newstates.Length; n++) { State s = newstates[n]; s.accept = states[s.number].accept; foreach (Transition t in states[s.number].Transitions) { s.AddTransition(new Transition(t.Min_Renamed, t.Max_Renamed, newstates[t.To.number])); } } a.ClearNumberedStates(); a.RemoveDeadTransitions(); }
/// <summary> /// Removes transitions to dead states and calls <seealso cref="#reduce()"/>. /// (A state is "dead" if no accept state is /// reachable from it.) /// </summary> public virtual void RemoveDeadTransitions() { State[] states = NumberedStates; //clearHashCode(); if (IsSingleton) { return; } State[] live = LiveStates; BitArray liveSet = new BitArray(states.Length); foreach (State s in live) { liveSet.SafeSet(s.number, true); } foreach (State s in states) { // filter out transitions to dead states: int upto = 0; for (int i = 0; i < s.numTransitions; i++) { Transition t = s.TransitionsArray[i]; if (liveSet.SafeGet(t.To.Number)) { s.TransitionsArray[upto++] = s.TransitionsArray[i]; } } s.numTransitions = upto; } for (int i = 0; i < live.Length; i++) { live[i].number = i; } if (live.Length > 0) { NumberedStates = live; } else { // sneaky corner case -- if machine accepts no strings ClearNumberedStates(); } Reduce(); }