示例#1
0
        /// <summary>
        /// Merging constructor.
        /// </summary>
        /// <param name="toMerge"><see cref="FieldPhraseList"/>s to merge to build this one</param>
        public FieldPhraseList(FieldPhraseList[] toMerge)
        {
            // Merge all overlapping WeightedPhraseInfos
            // Step 1.  Sort by startOffset, endOffset, and boost, in that order.

            IEnumerator <WeightedPhraseInfo>[] allInfos = new IEnumerator <WeightedPhraseInfo> [toMerge.Length];
            int index = 0;

            foreach (FieldPhraseList fplToMerge in toMerge)
            {
                allInfos[index++] = fplToMerge.phraseList.GetEnumerator();
            }
            MergedIterator <WeightedPhraseInfo> itr = new MergedIterator <WeightedPhraseInfo>(false, allInfos);

            // Step 2.  Walk the sorted list merging infos that overlap
            phraseList = new List <WeightedPhraseInfo>();
            if (!itr.MoveNext())
            {
                return;
            }
            List <WeightedPhraseInfo> work  = new List <WeightedPhraseInfo>();
            WeightedPhraseInfo        first = itr.Current;

            work.Add(first);
            int workEndOffset = first.EndOffset;

            while (itr.MoveNext())
            {
                WeightedPhraseInfo current = itr.Current;
                if (current.StartOffset <= workEndOffset)
                {
                    workEndOffset = Math.Max(workEndOffset, current.EndOffset);
                    work.Add(current);
                }
                else
                {
                    if (work.Count == 1)
                    {
                        phraseList.Add(work[0]);
                        work[0] = current;
                    }
                    else
                    {
                        phraseList.Add(new WeightedPhraseInfo(work));
                        work.Clear();
                        work.Add(current);
                    }
                    workEndOffset = current.EndOffset;
                }
            }
            if (work.Count == 1)
            {
                phraseList.Add(work[0]);
            }
            else
            {
                phraseList.Add(new WeightedPhraseInfo(work));
                work.Clear();
            }
        }
示例#2
0
            /// <summary>
            /// Merging constructor.  Note that this just grabs seqnum from the first info.
            /// </summary>
            public WeightedPhraseInfo(ICollection <WeightedPhraseInfo> toMerge)
            {
                // Pretty much the same idea as merging FieldPhraseLists:
                // Step 1.  Sort by startOffset, endOffset
                //          While we are here merge the boosts and termInfos
                IEnumerator <WeightedPhraseInfo> toMergeItr = toMerge.GetEnumerator();

                if (!toMergeItr.MoveNext())
                {
                    throw new ArgumentException("toMerge must contain at least one WeightedPhraseInfo.");
                }
                WeightedPhraseInfo first = toMergeItr.Current;

                IEnumerator <Toffs>[] allToffs = new IEnumerator <Toffs> [toMerge.Count];
                termsInfos  = new List <TermInfo>();
                seqnum      = first.seqnum;
                boost       = first.boost;
                allToffs[0] = first.termsOffsets.GetEnumerator();
                int index = 1;

                while (toMergeItr.MoveNext())
                {
                    WeightedPhraseInfo info = toMergeItr.Current;
                    boost += info.boost;
                    termsInfos.AddAll(info.termsInfos);
                    allToffs[index++] = info.termsOffsets.GetEnumerator();
                }
                // Step 2.  Walk the sorted list merging overlaps
                MergedIterator <Toffs> itr = new MergedIterator <Toffs>(false, allToffs);

                termsOffsets = new List <Toffs>();
                if (!itr.MoveNext())
                {
                    return;
                }
                Toffs work = itr.Current;

                while (itr.MoveNext())
                {
                    Toffs current = itr.Current;
                    if (current.StartOffset <= work.EndOffset)
                    {
                        work.EndOffset = Math.Max(work.EndOffset, current.EndOffset);
                    }
                    else
                    {
                        termsOffsets.Add(work);
                        work = current;
                    }
                }
                termsOffsets.Add(work);
            }
示例#3
0
        public virtual void TestMergeEmpty()
        {
            IEnumerator<int> merged = new MergedIterator<int>();
            Assert.IsFalse(merged.MoveNext());

            merged = new MergedIterator<int>((new List<int>()).GetEnumerator());
            Assert.IsFalse(merged.MoveNext());

            IEnumerator<int>[] itrs = new IEnumerator<int>[Random().Next(100)];
            for (int i = 0; i < itrs.Length; i++)
            {
                itrs[i] = (new List<int>()).GetEnumerator();
            }
            merged = new MergedIterator<int>(itrs);
            Assert.IsFalse(merged.MoveNext());
        }
示例#4
0
        private static List <T> MergeLists <T>(int offset, int count, MergedIterator <T> mergedIter)
        {
            for (int c = 0; c < offset && mergedIter.MoveNext(); c++)
            {
                var x = mergedIter.Current;
            }

            List <T> mergedList = new List <T>();

            for (int c = 0; c < count && mergedIter.MoveNext(); c++)
            {
                mergedList.Add(mergedIter.Current);
            }

            return(mergedList);
        }
        public virtual void TestMergeOne()
        {
            Term t1 = new Term("foo", "a");
            PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder();
            b1.Add(t1);
            PrefixCodedTerms pb1 = b1.Finish();

            Term t2 = new Term("foo", "b");
            PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder();
            b2.Add(t2);
            PrefixCodedTerms pb2 = b2.Finish();

            IEnumerator<Term> merged = new MergedIterator<Term>(pb1.GetEnumerator(), pb2.GetEnumerator());
            Assert.IsTrue(merged.MoveNext());
            Assert.AreEqual(t1, merged.Current);
            Assert.IsTrue(merged.MoveNext());
            Assert.AreEqual(t2, merged.Current);
        }
        public virtual void TestMergeRandom()
        {
            PrefixCodedTerms[]   pb       = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)];
            JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>();

            for (int i = 0; i < pb.Length; i++)
            {
                JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>();
                int nterms = TestUtil.NextInt32(Random, 0, 10000);
                for (int j = 0; j < nterms; j++)
                {
                    Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4));
                    terms.Add(term);
                }
                superSet.UnionWith(terms);

                PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
                foreach (Term @ref in terms)
                {
                    b.Add(@ref);
                }
                pb[i] = b.Finish();
            }

            List <IEnumerator <Term> > subs = new List <IEnumerator <Term> >();

            for (int i = 0; i < pb.Length; i++)
            {
                subs.Add(pb[i].GetEnumerator());
            }

            IEnumerator <Term> expected = superSet.GetEnumerator();
            IEnumerator <Term> actual   = new MergedIterator <Term>(subs.ToArray());

            while (actual.MoveNext())
            {
                Assert.IsTrue(expected.MoveNext());
                Assert.AreEqual(expected.Current, actual.Current);
            }
            Assert.IsFalse(expected.MoveNext());
        }
        public virtual void TestMergeOne()
        {
            Term t1 = new Term("foo", "a");

            PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder();
            b1.Add(t1);
            PrefixCodedTerms pb1 = b1.Finish();

            Term t2 = new Term("foo", "b");

            PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder();
            b2.Add(t2);
            PrefixCodedTerms pb2 = b2.Finish();

            IEnumerator <Term> merged = new MergedIterator <Term>(pb1.GetEnumerator(), pb2.GetEnumerator());

            Assert.IsTrue(merged.MoveNext());
            Assert.AreEqual(t1, merged.Current);
            Assert.IsTrue(merged.MoveNext());
            Assert.AreEqual(t2, merged.Current);
        }
        public virtual void TestMergeRandom()
        {
            PrefixCodedTerms[] pb = new PrefixCodedTerms[TestUtil.NextInt(Random(), 2, 10)];
            SortedSet<Term> superSet = new SortedSet<Term>();

            for (int i = 0; i < pb.Length; i++)
            {
                SortedSet<Term> terms = new SortedSet<Term>();
                int nterms = TestUtil.NextInt(Random(), 0, 10000);
                for (int j = 0; j < nterms; j++)
                {
                    Term term = new Term(TestUtil.RandomUnicodeString(Random(), 2), TestUtil.RandomUnicodeString(Random(), 4));
                    terms.Add(term);
                }
                superSet.AddAll(terms);

                PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
                foreach (Term @ref in terms)
                {
                    b.Add(@ref);
                }
                pb[i] = b.Finish();
            }

            List<IEnumerator<Term>> subs = new List<IEnumerator<Term>>();
            for (int i = 0; i < pb.Length; i++)
            {
                subs.Add(pb[i].GetEnumerator());
            }

            IEnumerator<Term> expected = superSet.GetEnumerator();
            IEnumerator<Term> actual = new MergedIterator<Term>(subs.ToArray());
            while (actual.MoveNext())
            {
                Assert.IsTrue(expected.MoveNext());
                Assert.AreEqual(expected.Current, actual.Current);
            }
            Assert.IsFalse(expected.MoveNext());
        }
示例#9
0
        private void TestCase(int itrsWithVal, int specifiedValsOnItr, bool removeDups)
        {
            // Build a random number of lists
            IList<int?> expected = new List<int?>();
            Random random = new Random(Random().Next());
            int numLists = itrsWithVal + random.Next(1000 - itrsWithVal);
            IList<int>[] lists = new IList<int>[numLists];
            for (int i = 0; i < numLists; i++)
            {
                lists[i] = new List<int>();
            }
            int start = random.Next(1000000);
            int end = start + VALS_TO_MERGE / itrsWithVal / Math.Abs(specifiedValsOnItr);
            for (int i = start; i < end; i++)
            {
                int maxList = lists.Length;
                int maxValsOnItr = 0;
                int sumValsOnItr = 0;
                for (int itrWithVal = 0; itrWithVal < itrsWithVal; itrWithVal++)
                {
                    int list = random.Next(maxList);
                    int valsOnItr = specifiedValsOnItr < 0 ? (1 + random.Next(-specifiedValsOnItr)) : specifiedValsOnItr;
                    maxValsOnItr = Math.Max(maxValsOnItr, valsOnItr);
                    sumValsOnItr += valsOnItr;
                    for (int valOnItr = 0; valOnItr < valsOnItr; valOnItr++)
                    {
                        lists[list].Add(i);
                    }
                    maxList = maxList - 1;
                    ArrayUtil.Swap(lists, list, maxList);
                }
                int maxCount = removeDups ? maxValsOnItr : sumValsOnItr;
                for (int count = 0; count < maxCount; count++)
                {
                    expected.Add(i);
                }
            }
            // Now check that they get merged cleanly
            IEnumerator<int>[] itrs = new IEnumerator<int>[numLists];
            for (int i = 0; i < numLists; i++)
            {
                itrs[i] = lists[i].GetEnumerator();
            }

            MergedIterator<int> mergedItr = new MergedIterator<int>(removeDups, itrs);
            IEnumerator<int?> expectedItr = expected.GetEnumerator();
            while (expectedItr.MoveNext())
            {
                Assert.IsTrue(mergedItr.MoveNext());
                Assert.AreEqual(expectedItr.Current, mergedItr.Current);
            }
            Assert.IsFalse(mergedItr.MoveNext());
        }