Ejemplo n.º 1
0
        /// <summary>
        /// Compute the union of the provided sets. this method is much faster than
        /// computing the union manually since it operates directly at the byte level.
        /// </summary>
        public static WAH8DocIdSet Union(ICollection <WAH8DocIdSet> docIdSets, int indexInterval)
        {
            switch (docIdSets.Count)
            {
            case 0:
                return(EMPTY);

            case 1:
                var iter = docIdSets.GetEnumerator();
                iter.MoveNext();
                return(iter.Current);
            }
            // The logic below is very similar to DisjunctionScorer
            int numSets = docIdSets.Count;
            PriorityQueue <Iterator> iterators = new PriorityQueueAnonymousInnerClassHelper(numSets);

            foreach (WAH8DocIdSet set in docIdSets)
            {
                Iterator iterator = (Iterator)set.GetIterator();
                iterator.NextWord();
                iterators.Add(iterator);
            }

            Iterator top = iterators.Top();

            if (top.WordNum == int.MaxValue)
            {
                return(EMPTY);
            }
            int         wordNum = top.WordNum;
            byte        word    = top.Word;
            WordBuilder builder = (WordBuilder)(new WordBuilder()).SetIndexInterval(indexInterval);

            while (true)
            {
                top.NextWord();
                iterators.UpdateTop();
                top = iterators.Top();
                if (top.WordNum == wordNum)
                {
                    word |= top.Word;
                }
                else
                {
                    builder.AddWord(wordNum, word);
                    if (top.WordNum == int.MaxValue)
                    {
                        break;
                    }
                    wordNum = top.WordNum;
                    word    = top.Word;
                }
            }
            return(builder.Build());
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Compute the intersection of the provided sets. this method is much faster than
        /// computing the intersection manually since it operates directly at the byte level.
        /// </summary>
        public static WAH8DocIdSet Intersect(ICollection <WAH8DocIdSet> docIdSets, int indexInterval)
        {
            switch (docIdSets.Count)
            {
            case 0:
                throw new System.ArgumentException("There must be at least one set to intersect");

            case 1:
                var iter = docIdSets.GetEnumerator();
                iter.MoveNext();
                return(iter.Current);
            }
            // The logic below is similar to ConjunctionScorer
            int numSets   = docIdSets.Count;
            var iterators = new Iterator[numSets];
            int i         = 0;

            foreach (WAH8DocIdSet set in docIdSets)
            {
                var it = (Iterator)set.GetIterator();
                iterators[i++] = it;
            }
            Array.Sort(iterators, SERIALIZED_LENGTH_COMPARATOR);
            WordBuilder builder = (WordBuilder)(new WordBuilder()).SetIndexInterval(indexInterval);
            int         wordNum = 0;

            while (true)
            {
                // Advance the least costly iterator first
                iterators[0].AdvanceWord(wordNum);
                wordNum = iterators[0].WordNum;
                if (wordNum == DocIdSetIterator.NO_MORE_DOCS)
                {
                    break;
                }
                byte word = iterators[0].Word;
                for (i = 1; i < numSets; ++i)
                {
                    if (iterators[i].WordNum < wordNum)
                    {
                        iterators[i].AdvanceWord(wordNum);
                    }
                    if (iterators[i].WordNum > wordNum)
                    {
                        wordNum = iterators[i].WordNum;
                        goto mainContinue;
                    }
                    Debug.Assert(iterators[i].WordNum == wordNum);
                    word &= iterators[i].Word;
                    if (word == 0)
                    {
                        // There are common words, but they don't share any bit
                        ++wordNum;
                        goto mainContinue;
                    }
                }
                // Found a common word
                Debug.Assert(word != 0);
                builder.AddWord(wordNum, word);
                ++wordNum;
                mainContinue :;
            }
            //mainBreak:
            return(builder.Build());
        }