Cardinality() public method

public Cardinality ( ) : long
return long
        public override RandomAccessDocIdSet GetRandomAccessDocIdSet(BoboIndexReader reader)
        {
            MultiValueFacetDataCache dataCache = _facetHandler.GetFacetData<MultiValueFacetDataCache>(reader);
            int[] index = _valueConverter.Convert(dataCache, _vals);
            //BigNestedIntArray nestedArray = dataCache.NestedArray;
            OpenBitSet bitset = new OpenBitSet(dataCache.ValArray.Count);

            foreach (int i in index)
            {
                bitset.FastSet(i);
            }

            if (_takeCompliment)
            {
                // flip the bits
                int size = dataCache.ValArray.Count;
                for (int i = 0; i < size; ++i)
                {
                    bitset.FastFlip(i);
                }
            }

            long count = bitset.Cardinality();

            if (count == 0)
            {
                return new EmptyRandomAccessDocIdSet();
            }
            else
            {
                return new MultiRandomAccessDocIdSet(dataCache, bitset);
            }
        }
Example #2
0
		internal virtual void  DoRandomSets(int maxSize, int iter, int mode)
		{
			System.Collections.BitArray a0 = null;
			OpenBitSet b0 = null;
			
			for (int i = 0; i < iter; i++)
			{
				int sz = rand.Next(maxSize);
				System.Collections.BitArray a = new System.Collections.BitArray(sz);
				OpenBitSet b = new OpenBitSet(sz);
				
				// test the various ways of setting bits
				if (sz > 0)
				{
					int nOper = rand.Next(sz);
					for (int j = 0; j < nOper; j++)
					{
						int idx;
						
						idx = rand.Next(sz);
						a.Set(idx, true);
						b.FastSet(idx);
						idx = rand.Next(sz);
						a.Set(idx, false);
						b.FastClear(idx);
						idx = rand.Next(sz);
						a.Set(idx, !a.Get(idx));
						b.FastFlip(idx);
						
						bool val = b.FlipAndGet(idx);
						bool val2 = b.FlipAndGet(idx);
						Assert.IsTrue(val != val2);
						
						val = b.GetAndSet(idx);
						Assert.IsTrue(val2 == val);
						Assert.IsTrue(b.Get(idx));
						
						if (!val)
							b.FastClear(idx);
						Assert.IsTrue(b.Get(idx) == val);
					}
				}
				
				// test that the various ways of accessing the bits are equivalent
				DoGet(a, b);
				
                // {{dougsale-2.4.0}}
                //
                // Java's java.util.BitSet automatically grows as needed - i.e., when a bit is referenced beyond
                // the size of the BitSet, an exception isn't thrown - rather, the set grows to the size of the 
                // referenced bit.
                //
                // System.Collections.BitArray does not have this feature, and thus I've faked it here by
                // "growing" the array explicitly when necessary (creating a new instance of the appropriate size
                // and setting the appropriate bits).
                //

                // test ranges, including possible extension
                int fromIndex, toIndex;
                fromIndex = rand.Next(sz + 80);
                toIndex = fromIndex + rand.Next((sz >> 1) + 1);

                // {{dougsale-2.4.0}}:
                // The following commented-out, compound statement's 'for loop' implicitly grows the Java BitSets 'a'
                // and 'aa' to the same cardinality as 'j+1' when 'a.Count < j+1' and 'fromIndex < toIndex':
                //BitArray aa = (BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) aa.Set(j, !a.Get(j));
                // So, if necessary, lets explicitly grow 'a' now; then 'a' and its clone, 'aa', will be of the required size.
                if (a.Count < toIndex && fromIndex < toIndex)
                {
                    System.Collections.BitArray tmp = new System.Collections.BitArray(toIndex, false);
                    for (int k = 0; k < a.Count; k++)
                        tmp.Set(k, a.Get(k));
                    a = tmp;
                }
                // {{dougsale-2.4.0}}: now we can invoke this statement without going 'out-of-bounds'
                System.Collections.BitArray aa = (System.Collections.BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) aa.Set(j, !a.Get(j));
                OpenBitSet bb = (OpenBitSet)b.Clone(); bb.Flip(fromIndex, toIndex);

                DoIterate(aa, bb, mode); // a problem here is from flip or doIterate

                fromIndex = rand.Next(sz + 80);
                toIndex = fromIndex + rand.Next((sz >> 1) + 1);
                // {{dougsale-2.4.0}}:
                // The following commented-out, compound statement's 'for loop' implicitly grows the Java BitSet 'aa'
                // when 'a.Count < j+1' and 'fromIndex < toIndex'
                //aa = (BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) aa.Set(j, false);
                // So, if necessary, lets explicitly grow 'aa' now
                if (a.Count < toIndex && fromIndex < toIndex)
                {
                    aa = new System.Collections.BitArray(toIndex);
                    for (int k = 0; k < a.Count; k++)
                        aa.Set(k, a.Get(k));
                }
                else
                {
                    aa = (System.Collections.BitArray)a.Clone();
                }
                for (int j = fromIndex; j < toIndex; j++) aa.Set(j, false);
                bb = (OpenBitSet)b.Clone(); bb.Clear(fromIndex, toIndex);

                DoNextSetBit(aa, bb); // a problem here is from clear() or nextSetBit

                fromIndex = rand.Next(sz + 80);
                toIndex = fromIndex + rand.Next((sz >> 1) + 1);
                // {{dougsale-2.4.0}}:
                // The following commented-out, compound statement's 'for loop' implicitly grows the Java BitSet 'aa'
                // when 'a.Count < j+1' and 'fromIndex < toIndex'
                //aa = (BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) aa.Set(j, false);
                // So, if necessary, lets explicitly grow 'aa' now
                if (a.Count < toIndex && fromIndex < toIndex)
                {
                    aa = new System.Collections.BitArray(toIndex);
                    for (int k = 0; k < a.Count; k++)
                        aa.Set(k, a.Get(k));
                }
                else
                {
                    aa = (System.Collections.BitArray)a.Clone();
                }
                for (int j = fromIndex; j < toIndex; j++) aa.Set(j, true);
                bb = (OpenBitSet)b.Clone(); bb.Set(fromIndex, toIndex);
				
				DoNextSetBit(aa, bb); // a problem here is from set() or nextSetBit     
				
				
				if (a0 != null)
				{
                    Assert.AreEqual(a.Equals(a0), b.Equals(b0));

                    Assert.AreEqual(SupportClass.BitSetSupport.Cardinality(a), b.Cardinality());

                    // {{dougsale-2.4.0}}
                    //
                    // The Java code used java.util.BitSet, which grows as needed.
                    // When a bit, outside the dimension of the set is referenced,
                    // the set automatically grows to the necessary size.  The
                    // new entries default to false.
                    //
                    // BitArray does not grow automatically and is not growable.
                    // Thus when BitArray instances of mismatched cardinality
                    // interact, we must first explicitly "grow" the smaller one.
                    //
                    // This growth is acheived by creating a new instance of the
                    // required size and copying the appropriate values.
                    //

                    //BitArray a_and = (BitArray)a.Clone(); a_and.And(a0);
                    //BitArray a_or = (BitArray)a.Clone(); a_or.Or(a0);
                    //BitArray a_xor = (BitArray)a.Clone(); a_xor.Xor(a0);
                    //BitArray a_andn = (BitArray)a.Clone(); for (int j = 0; j < a_andn.Count; j++) if (a0.Get(j)) a_andn.Set(j, false);

                    System.Collections.BitArray a_and;
                    System.Collections.BitArray a_or;
                    System.Collections.BitArray a_xor;
                    System.Collections.BitArray a_andn;

                    if (a.Count < a0.Count)
                    {
                        // the Java code would have implicitly resized 'a_and', 'a_or', 'a_xor', and 'a_andn'
                        // in this case, so we explicitly create a resized stand-in for 'a' here, allowing for
                        // a to keep its original size while 'a_and', 'a_or', 'a_xor', and 'a_andn' are resized
                        System.Collections.BitArray tmp = new System.Collections.BitArray(a0.Count, false);
                        for (int z = 0; z < a.Count; z++)
                            tmp.Set(z, a.Get(z));

                        a_and = (System.Collections.BitArray)tmp.Clone(); a_and.And(a0);
                        a_or = (System.Collections.BitArray)tmp.Clone(); a_or.Or(a0);
                        a_xor = (System.Collections.BitArray)tmp.Clone(); a_xor.Xor(a0);
                        a_andn = (System.Collections.BitArray)tmp.Clone(); for (int j = 0; j < a_andn.Count; j++) if (a0.Get(j)) a_andn.Set(j, false);
                    }
                    else if (a.Count > a0.Count)
                    {
                        // the Java code would have implicitly resized 'a0' in this case, so
                        // we explicitly do so here:
                        System.Collections.BitArray tmp = new System.Collections.BitArray(a.Count, false);
                        for (int z = 0; z < a0.Count; z++)
                            tmp.Set(z, a0.Get(z));
                        a0 = tmp;

                        a_and = (System.Collections.BitArray)a.Clone(); a_and.And(a0);
                        a_or = (System.Collections.BitArray)a.Clone(); a_or.Or(a0);
                        a_xor = (System.Collections.BitArray)a.Clone(); a_xor.Xor(a0);
                        a_andn = (System.Collections.BitArray)a.Clone(); for (int j = 0; j < a_andn.Count; j++) if (a0.Get(j)) a_andn.Set(j, false);
                    }
                    else
                    {
                        // 'a' and 'a0' are the same size, no explicit growing necessary
                        a_and = (System.Collections.BitArray)a.Clone(); a_and.And(a0);
                        a_or = (System.Collections.BitArray)a.Clone(); a_or.Or(a0);
                        a_xor = (System.Collections.BitArray)a.Clone(); a_xor.Xor(a0);
                        a_andn = (System.Collections.BitArray)a.Clone(); for (int j = 0; j < a_andn.Count; j++) if (a0.Get(j)) a_andn.Set(j, false);
                    }

                    OpenBitSet b_and = (OpenBitSet)b.Clone(); Assert.AreEqual(b, b_and); b_and.And(b0);
                    OpenBitSet b_or = (OpenBitSet)b.Clone(); b_or.Or(b0);
                    OpenBitSet b_xor = (OpenBitSet)b.Clone(); b_xor.Xor(b0);
                    OpenBitSet b_andn = (OpenBitSet)b.Clone(); b_andn.AndNot(b0);

                    DoIterate(a_and, b_and, mode);
                    DoIterate(a_or, b_or, mode);
                    DoIterate(a_xor, b_xor, mode);
                    DoIterate(a_andn, b_andn, mode);

                    Assert.AreEqual(SupportClass.BitSetSupport.Cardinality(a_and), b_and.Cardinality());
                    Assert.AreEqual(SupportClass.BitSetSupport.Cardinality(a_or), b_or.Cardinality());
                    Assert.AreEqual(SupportClass.BitSetSupport.Cardinality(a_xor), b_xor.Cardinality());
                    Assert.AreEqual(SupportClass.BitSetSupport.Cardinality(a_andn), b_andn.Cardinality());

                    // test non-mutating popcounts
                    Assert.AreEqual(b_and.Cardinality(), OpenBitSet.IntersectionCount(b, b0));
                    Assert.AreEqual(b_or.Cardinality(), OpenBitSet.UnionCount(b, b0));
                    Assert.AreEqual(b_xor.Cardinality(), OpenBitSet.XorCount(b, b0));
                    Assert.AreEqual(b_andn.Cardinality(), OpenBitSet.AndNotCount(b, b0));
                }
				
				a0 = a;
				b0 = b;
			}
		}
Example #3
0
        internal virtual void  DoRandomSets(int maxSize, int iter, int mode)
        {
            System.Collections.BitArray a0 = null;
            OpenBitSet b0 = null;

            for (int i = 0; i < iter; i++)
            {
                int sz = rand.Next(maxSize);
                System.Collections.BitArray a = new System.Collections.BitArray(sz);
                OpenBitSet b = new OpenBitSet(sz);

                // test the various ways of setting bits
                if (sz > 0)
                {
                    int nOper = rand.Next(sz);
                    for (int j = 0; j < nOper; j++)
                    {
                        int idx;

                        idx = rand.Next(sz);
                        a.Set(idx, true);
                        b.FastSet(idx);
                        idx = rand.Next(sz);
                        a.Set(idx, false);
                        b.FastClear(idx);
                        idx = rand.Next(sz);
                        a.Set(idx, !a.Get(idx));
                        b.FastFlip(idx);

                        bool val  = b.FlipAndGet(idx);
                        bool val2 = b.FlipAndGet(idx);
                        Assert.IsTrue(val != val2);

                        val = b.GetAndSet(idx);
                        Assert.IsTrue(val2 == val);
                        Assert.IsTrue(b.Get(idx));

                        if (!val)
                        {
                            b.FastClear(idx);
                        }
                        Assert.IsTrue(b.Get(idx) == val);
                    }
                }

                // test that the various ways of accessing the bits are equivalent
                DoGet(a, b);

                // {{dougsale-2.4.0}}
                //
                // Java's java.util.BitSet automatically grows as needed - i.e., when a bit is referenced beyond
                // the size of the BitSet, an exception isn't thrown - rather, the set grows to the size of the
                // referenced bit.
                //
                // System.Collections.BitArray does not have this feature, and thus I've faked it here by
                // "growing" the array explicitly when necessary (creating a new instance of the appropriate size
                // and setting the appropriate bits).
                //

                // test ranges, including possible extension
                int fromIndex, toIndex;
                fromIndex = rand.Next(sz + 80);
                toIndex   = fromIndex + rand.Next((sz >> 1) + 1);

                // {{dougsale-2.4.0}}:
                // The following commented-out, compound statement's 'for loop' implicitly grows the Java BitSets 'a'
                // and 'aa' to the same cardinality as 'j+1' when 'a.Count < j+1' and 'fromIndex < toIndex':
                //BitArray aa = (BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) aa.Set(j, !a.Get(j));
                // So, if necessary, lets explicitly grow 'a' now; then 'a' and its clone, 'aa', will be of the required size.
                if (a.Length < toIndex && fromIndex < toIndex)
                {
                    System.Collections.BitArray tmp = new System.Collections.BitArray(toIndex, false);
                    for (int k = 0; k < a.Length; k++)
                    {
                        tmp.Set(k, a.Get(k));
                    }
                    a = tmp;
                }
                // {{dougsale-2.4.0}}: now we can invoke this statement without going 'out-of-bounds'
                System.Collections.BitArray aa = (System.Collections.BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++)
                {
                    aa.Set(j, !a.Get(j));
                }
                OpenBitSet bb = (OpenBitSet)b.Clone(); bb.Flip(fromIndex, toIndex);

                DoIterate(aa, bb, mode); // a problem here is from flip or doIterate

                fromIndex = rand.Next(sz + 80);
                toIndex   = fromIndex + rand.Next((sz >> 1) + 1);
                // {{dougsale-2.4.0}}:
                // The following commented-out, compound statement's 'for loop' implicitly grows the Java BitSet 'aa'
                // when 'a.Count < j+1' and 'fromIndex < toIndex'
                //aa = (BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) aa.Set(j, false);
                // So, if necessary, lets explicitly grow 'aa' now
                if (a.Length < toIndex && fromIndex < toIndex)
                {
                    aa = new System.Collections.BitArray(toIndex);
                    for (int k = 0; k < a.Length; k++)
                    {
                        aa.Set(k, a.Get(k));
                    }
                }
                else
                {
                    aa = (System.Collections.BitArray)a.Clone();
                }
                for (int j = fromIndex; j < toIndex; j++)
                {
                    aa.Set(j, false);
                }
                bb = (OpenBitSet)b.Clone(); bb.Clear(fromIndex, toIndex);

                DoNextSetBit(aa, bb); // a problem here is from clear() or nextSetBit

                fromIndex = rand.Next(sz + 80);
                toIndex   = fromIndex + rand.Next((sz >> 1) + 1);
                // {{dougsale-2.4.0}}:
                // The following commented-out, compound statement's 'for loop' implicitly grows the Java BitSet 'aa'
                // when 'a.Count < j+1' and 'fromIndex < toIndex'
                //aa = (BitArray)a.Clone(); for (int j = fromIndex; j < toIndex; j++) aa.Set(j, false);
                // So, if necessary, lets explicitly grow 'aa' now
                if (a.Length < toIndex && fromIndex < toIndex)
                {
                    aa = new System.Collections.BitArray(toIndex);
                    for (int k = 0; k < a.Length; k++)
                    {
                        aa.Set(k, a.Get(k));
                    }
                }
                else
                {
                    aa = (System.Collections.BitArray)a.Clone();
                }
                for (int j = fromIndex; j < toIndex; j++)
                {
                    aa.Set(j, true);
                }
                bb = (OpenBitSet)b.Clone(); bb.Set(fromIndex, toIndex);

                DoNextSetBit(aa, bb);                 // a problem here is from set() or nextSetBit


                if (a0 != null)
                {
                    Assert.AreEqual(a.Equals(a0), b.Equals(b0));

                    Assert.AreEqual(BitSetSupport.Cardinality(a), b.Cardinality());

                    // {{dougsale-2.4.0}}
                    //
                    // The Java code used java.util.BitSet, which grows as needed.
                    // When a bit, outside the dimension of the set is referenced,
                    // the set automatically grows to the necessary size.  The
                    // new entries default to false.
                    //
                    // BitArray does not grow automatically and is not growable.
                    // Thus when BitArray instances of mismatched cardinality
                    // interact, we must first explicitly "grow" the smaller one.
                    //
                    // This growth is acheived by creating a new instance of the
                    // required size and copying the appropriate values.
                    //

                    //BitArray a_and = (BitArray)a.Clone(); a_and.And(a0);
                    //BitArray a_or = (BitArray)a.Clone(); a_or.Or(a0);
                    //BitArray a_xor = (BitArray)a.Clone(); a_xor.Xor(a0);
                    //BitArray a_andn = (BitArray)a.Clone(); for (int j = 0; j < a_andn.Count; j++) if (a0.Get(j)) a_andn.Set(j, false);

                    System.Collections.BitArray a_and;
                    System.Collections.BitArray a_or;
                    System.Collections.BitArray a_xor;
                    System.Collections.BitArray a_andn;

                    if (a.Length < a0.Length)
                    {
                        // the Java code would have implicitly resized 'a_and', 'a_or', 'a_xor', and 'a_andn'
                        // in this case, so we explicitly create a resized stand-in for 'a' here, allowing for
                        // a to keep its original size while 'a_and', 'a_or', 'a_xor', and 'a_andn' are resized
                        System.Collections.BitArray tmp = new System.Collections.BitArray(a0.Length, false);
                        for (int z = 0; z < a.Length; z++)
                        {
                            tmp.Set(z, a.Get(z));
                        }

                        a_and  = (System.Collections.BitArray)tmp.Clone(); a_and.And(a0);
                        a_or   = (System.Collections.BitArray)tmp.Clone(); a_or.Or(a0);
                        a_xor  = (System.Collections.BitArray)tmp.Clone(); a_xor.Xor(a0);
                        a_andn = (System.Collections.BitArray)tmp.Clone(); for (int j = 0; j < a_andn.Length; j++)
                        {
                            if (a0.Get(j))
                            {
                                a_andn.Set(j, false);
                            }
                        }
                    }
                    else if (a.Length > a0.Length)
                    {
                        // the Java code would have implicitly resized 'a0' in this case, so
                        // we explicitly do so here:
                        System.Collections.BitArray tmp = new System.Collections.BitArray(a.Length, false);
                        for (int z = 0; z < a0.Length; z++)
                        {
                            tmp.Set(z, a0.Get(z));
                        }
                        a0 = tmp;

                        a_and  = (System.Collections.BitArray)a.Clone(); a_and.And(a0);
                        a_or   = (System.Collections.BitArray)a.Clone(); a_or.Or(a0);
                        a_xor  = (System.Collections.BitArray)a.Clone(); a_xor.Xor(a0);
                        a_andn = (System.Collections.BitArray)a.Clone(); for (int j = 0; j < a_andn.Length; j++)
                        {
                            if (a0.Get(j))
                            {
                                a_andn.Set(j, false);
                            }
                        }
                    }
                    else
                    {
                        // 'a' and 'a0' are the same size, no explicit growing necessary
                        a_and  = (System.Collections.BitArray)a.Clone(); a_and.And(a0);
                        a_or   = (System.Collections.BitArray)a.Clone(); a_or.Or(a0);
                        a_xor  = (System.Collections.BitArray)a.Clone(); a_xor.Xor(a0);
                        a_andn = (System.Collections.BitArray)a.Clone(); for (int j = 0; j < a_andn.Length; j++)
                        {
                            if (a0.Get(j))
                            {
                                a_andn.Set(j, false);
                            }
                        }
                    }

                    OpenBitSet b_and  = (OpenBitSet)b.Clone(); Assert.AreEqual(b, b_and); b_and.And(b0);
                    OpenBitSet b_or   = (OpenBitSet)b.Clone(); b_or.Or(b0);
                    OpenBitSet b_xor  = (OpenBitSet)b.Clone(); b_xor.Xor(b0);
                    OpenBitSet b_andn = (OpenBitSet)b.Clone(); b_andn.AndNot(b0);

                    DoIterate(a_and, b_and, mode);
                    DoIterate(a_or, b_or, mode);
                    DoIterate(a_xor, b_xor, mode);
                    DoIterate(a_andn, b_andn, mode);

                    Assert.AreEqual(BitSetSupport.Cardinality(a_and), b_and.Cardinality());
                    Assert.AreEqual(BitSetSupport.Cardinality(a_or), b_or.Cardinality());
                    Assert.AreEqual(BitSetSupport.Cardinality(a_xor), b_xor.Cardinality());
                    Assert.AreEqual(BitSetSupport.Cardinality(a_andn), b_andn.Cardinality());

                    // test non-mutating popcounts
                    Assert.AreEqual(b_and.Cardinality(), OpenBitSet.IntersectionCount(b, b0));
                    Assert.AreEqual(b_or.Cardinality(), OpenBitSet.UnionCount(b, b0));
                    Assert.AreEqual(b_xor.Cardinality(), OpenBitSet.XorCount(b, b0));
                    Assert.AreEqual(b_andn.Cardinality(), OpenBitSet.AndNotCount(b, b0));
                }

                a0 = a;
                b0 = b;
            }
        }
Example #4
0
        //Compares a BitArray with an OpenBitSet
        public static bool Equal(this BitArray a, OpenBitSet b)
        {
            var bitArrayCardinality = a.Cardinality();
            if (bitArrayCardinality != b.Cardinality())
                return false;

            for (int i = 0; i < bitArrayCardinality; i++)
            {
                if (a.Get(i) != b.Get(i))
                    return false;
            }

            return true;
        }
        public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
        {
            long t0 = System.Environment.TickCount;
            int maxdoc = reader.MaxDoc;
            BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);
            BigNestedIntArray.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null);

            TermEnum tenum = null;
            TermDocs tdoc = null;
            var list = (listFactory == null ? new TermStringList() : listFactory.CreateTermList());
            List<int> minIDList = new List<int>();
            List<int> maxIDList = new List<int>();
            List<int> freqList = new List<int>();
            OpenBitSet bitset = new OpenBitSet(maxdoc + 1);
            int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
            int t = 0; // current term number
            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;

            _overflow = false;

            string pre = null;

            int df = 0;
            int minID = -1;
            int maxID = -1;
            int valId = 0;

            try
            {
                tdoc = reader.TermDocs();
                tenum = reader.Terms(new Term(fieldName, ""));
                if (tenum != null)
                {
                    do
                    {
                        Term term = tenum.Term;
                        if (term == null || !fieldName.Equals(term.Field))
                            break;

                        string val = term.Text;

                        if (val != null)
                        {
                            int weight = 0;
                            string[] split = val.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries);
                            if (split.Length > 1)
                            {
                                val = split[0];
                                weight = int.Parse(split[split.Length - 1]);
                            }
                            if (pre == null || !val.Equals(pre))
                            {
                                if (pre != null)
                                {
                                    freqList.Add(df);
                                    minIDList.Add(minID);
                                    maxIDList.Add(maxID);
                                }

                                list.Add(val);

                                df = 0;
                                minID = -1;
                                maxID = -1;
                                valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                                t++;
                            }

                            tdoc.Seek(tenum);
                            if (tdoc.Next())
                            {
                                df++;
                                int docid = tdoc.Doc;

                                if (!loader.Add(docid, valId)) LogOverflow(fieldName);
                                else weightLoader.Add(docid, weight);

                                if (docid < minID) minID = docid;
                                bitset.FastSet(docid);
                                while (tdoc.Next())
                                {
                                    df++;
                                    docid = tdoc.Doc;

                                    if (!loader.Add(docid, valId)) LogOverflow(fieldName);
                                    else weightLoader.Add(docid, weight);

                                    bitset.FastSet(docid);
                                }
                                if (docid > maxID) maxID = docid;
                            }
                            pre = val;
                        }

                    }
                    while (tenum.Next());
                    if (pre != null)
                    {
                        freqList.Add(df);
                        minIDList.Add(minID);
                        maxIDList.Add(maxID);
                    }
                }
            }
            finally
            {
                try
                {
                    if (tdoc != null)
                    {
                        tdoc.Dispose();
                    }
                }
                finally
                {
                    if (tenum != null)
                    {
                        tenum.Dispose();
                    }
                }
            }

            list.Seal();

            try
            {
                _nestedArray.Load(maxdoc + 1, loader);
                _weightArray.Load(maxdoc + 1, weightLoader);
            }
            catch (System.IO.IOException e)
            {
                throw e;
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.valArray = list;
            this.freqs = freqList.ToArray();
            this.minIDs = minIDList.ToArray();
            this.maxIDs = maxIDList.ToArray();

            int doc = 0;
            while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc <= maxdoc)
            {
                this.minIDs[0] = doc;
                doc = maxdoc;
                while (doc > 0 && !_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality();   
        }
        /// <summary>
        /// loads multi-value facet data. This method uses a workarea to prepare loading.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <param name="listFactory"></param>
        /// <param name="workArea"></param>
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
        {
            long t0 = Environment.TickCount;
            int maxdoc = reader.MaxDoc;
            BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);

            TermEnum tenum = null;
            TermDocs tdoc = null;
            ITermValueList list = (listFactory == null ? (ITermValueList)new TermStringList() : listFactory.CreateTermList());
            List<int> minIDList = new List<int>();
            List<int> maxIDList = new List<int>();
            List<int> freqList = new List<int>();
            OpenBitSet bitset = new OpenBitSet();
            int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
            int t = 0; // current term number
            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;

            _overflow = false;
            try
            {
                tdoc = reader.TermDocs();
                tenum = reader.Terms(new Term(fieldName, ""));
                if (tenum != null)
                {
                    do
                    {
                        Term term = tenum.Term;
                        if (term == null || !fieldName.Equals(term.Field))
                            break;

                        string val = term.Text;

                        if (val != null)
                        {
                            list.Add(val);

                            tdoc.Seek(tenum);
                            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
                            int df = 0;
                            int minID = -1;
                            int maxID = -1;
                            int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                            if (tdoc.Next())
                            {
                                df++;
                                int docid = tdoc.Doc;

                                if (!loader.Add(docid, valId))
                                    LogOverflow(fieldName);
                                minID = docid;
                                bitset.Set(docid);
                                while (tdoc.Next())
                                {
                                    df++;
                                    docid = tdoc.Doc;

                                    if (!loader.Add(docid, valId))
                                        LogOverflow(fieldName);
                                    bitset.Set(docid);
                                }
                                maxID = docid;
                            }
                            freqList.Add(df);
                            minIDList.Add(minID);
                            maxIDList.Add(maxID);
                        }

                        t++;
                    }
                    while (tenum.Next());
                }
            }
            finally
            {
                try
                {
                    if (tdoc != null)
                    {
                        tdoc.Dispose();
                    }
                }
                finally
                {
                    if (tenum != null)
                    {
                        tenum.Dispose();
                    }
                }
            }

            list.Seal();

            try
            {
                _nestedArray.Load(maxdoc + 1, loader);
            }
            catch (System.IO.IOException e)
            {
                throw e;
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.valArray = list;
            this.freqs = freqList.ToArray();
            this.minIDs = minIDList.ToArray();
            this.maxIDs = maxIDList.ToArray();

            int doc = 0;
            while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc <= maxdoc)
            {
                this.minIDs[0] = doc;
                doc = maxdoc;
                while (doc > 0 && !_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality();
        }
Example #7
0
        /// <summary>
        /// Assert that the content of the <see cref="DocIdSet"/> is the same as the content of the <see cref="OpenBitSet"/>.
        /// </summary>
#pragma warning disable xUnit1013
        public virtual void AssertEquals(int numBits, OpenBitSet ds1, WAH8DocIdSet ds2)
#pragma warning restore xUnit1013
        {
            // nextDoc
            DocIdSetIterator it2 = ds2.GetIterator();

            if (it2 == null)
            {
                Assert.AreEqual(-1, ds1.NextSetBit(0));
            }
            else
            {
                Assert.AreEqual(-1, it2.DocID);
                for (int doc = ds1.NextSetBit(0); doc != -1; doc = ds1.NextSetBit(doc + 1))
                {
                    Assert.AreEqual(doc, it2.NextDoc());
                    Assert.AreEqual(doc, it2.DocID);
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.DocID);
            }

            // nextDoc / advance
            it2 = ds2.GetIterator();
            if (it2 == null)
            {
                Assert.AreEqual(-1, ds1.NextSetBit(0));
            }
            else
            {
                for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;)
                {
                    if (Random.NextBoolean())
                    {
                        doc = ds1.NextSetBit(doc + 1);
                        if (doc == -1)
                        {
                            doc = DocIdSetIterator.NO_MORE_DOCS;
                        }
                        Assert.AreEqual(doc, it2.NextDoc());
                        Assert.AreEqual(doc, it2.DocID);
                    }
                    else
                    {
                        int target = doc + 1 + Random.Next(Random.NextBoolean() ? 64 : Math.Max(numBits / 8, 1));
                        doc = ds1.NextSetBit(target);
                        if (doc == -1)
                        {
                            doc = DocIdSetIterator.NO_MORE_DOCS;
                        }
                        Assert.AreEqual(doc, it2.Advance(target));
                        Assert.AreEqual(doc, it2.DocID);
                    }
                }
            }

            // bits()
            IBits bits = ds2.Bits;

            if (bits != null)
            {
                // test consistency between bits and iterator
                it2 = ds2.GetIterator();
                for (int previousDoc = -1, doc = it2.NextDoc(); ; previousDoc = doc, doc = it2.NextDoc())
                {
                    int max = doc == DocIdSetIterator.NO_MORE_DOCS ? bits.Length : doc;
                    for (int i = previousDoc + 1; i < max; ++i)
                    {
                        Assert.AreEqual(false, bits.Get(i));
                    }
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    Assert.AreEqual(true, bits.Get(doc));
                }
            }

            Assert.AreEqual(ds1.Cardinality(), ds2.Cardinality());
        }
 private void DoTestMultiThreads(bool withTimeout)
 {
     ThreadClass[] threadArray = new ThreadClass[N_THREADS];
     OpenBitSet success = new OpenBitSet(N_THREADS);
     for (int i = 0; i < threadArray.Length; ++i)
     {
         int num = i;
         threadArray[num] = new ThreadClassAnonymousHelper(this, success, withTimeout, num);
     }
     for (int i = 0; i < threadArray.Length; ++i)
     {
         threadArray[i].Start();
     }
     for (int i = 0; i < threadArray.Length; ++i)
     {
         threadArray[i].Join();
     }
     assertEquals("some threads failed!", N_THREADS, success.Cardinality());
 }