Esempio n. 1
0
            /// <summary>
            /// Merging constructor.  Note that this just grabs seqnum from the first info.
            /// </summary>
            public WeightedPhraseInfo(ICollection <WeightedPhraseInfo> toMerge)
            {
                IEnumerator <Toffs>[] allToffs = new IEnumerator <Toffs> [toMerge.Count];
                try
                {
                    // Pretty much the same idea as merging FieldPhraseLists:
                    // Step 1.  Sort by startOffset, endOffset
                    //          While we are here merge the boosts and termInfos
                    using (IEnumerator <WeightedPhraseInfo> toMergeItr = toMerge.GetEnumerator())
                    {
                        if (!toMergeItr.MoveNext())
                        {
                            throw new ArgumentException("toMerge must contain at least one WeightedPhraseInfo.");
                        }
                        WeightedPhraseInfo first = toMergeItr.Current;

                        termsInfos  = new List <TermInfo>();
                        seqnum      = first.seqnum;
                        boost       = first.boost;
                        allToffs[0] = first.termsOffsets.GetEnumerator();
                        int index = 1;
                        while (toMergeItr.MoveNext())
                        {
                            WeightedPhraseInfo info = toMergeItr.Current;
                            boost += info.boost;
                            termsInfos.AddRange(info.termsInfos);
                            allToffs[index++] = info.termsOffsets.GetEnumerator();
                        }

                        // Step 2.  Walk the sorted list merging overlaps
                        using (MergedEnumerator <Toffs> itr = new MergedEnumerator <Toffs>(false, allToffs))
                        {
                            termsOffsets = new List <Toffs>();
                            if (!itr.MoveNext())
                            {
                                return;
                            }
                            Toffs work = itr.Current;
                            while (itr.MoveNext())
                            {
                                Toffs current = itr.Current;
                                if (current.StartOffset <= work.EndOffset)
                                {
                                    work.EndOffset = Math.Max(work.EndOffset, current.EndOffset);
                                }
                                else
                                {
                                    termsOffsets.Add(work);
                                    work = current;
                                }
                            }
                            termsOffsets.Add(work);
                        }
                    }
                }
                finally
                {
                    IOUtils.Dispose(allToffs);
                }
            }
        public virtual void TestMergeOne()
        {
            Term t1 = new Term("foo", "a");

            PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder();
            b1.Add(t1);
            PrefixCodedTerms pb1 = b1.Finish();

            Term t2 = new Term("foo", "b");

            PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder();
            b2.Add(t2);
            PrefixCodedTerms pb2 = b2.Finish();

            IEnumerator <Term> merged = new MergedEnumerator <Term>(pb1.GetEnumerator(), pb2.GetEnumerator());

            Assert.IsTrue(merged.MoveNext());
            Assert.AreEqual(t1, merged.Current);
            Assert.IsTrue(merged.MoveNext());
            Assert.AreEqual(t2, merged.Current);
        }
        public virtual void TestMergeRandom()
        {
            PrefixCodedTerms[]   pb       = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)];
            JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>();

            for (int i = 0; i < pb.Length; i++)
            {
                JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>();
                int nterms = TestUtil.NextInt32(Random, 0, 10000);
                for (int j = 0; j < nterms; j++)
                {
                    Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4));
                    terms.Add(term);
                }
                superSet.UnionWith(terms);

                PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
                foreach (Term @ref in terms)
                {
                    b.Add(@ref);
                }
                pb[i] = b.Finish();
            }

            List <IEnumerator <Term> > subs = new List <IEnumerator <Term> >();

            for (int i = 0; i < pb.Length; i++)
            {
                subs.Add(pb[i].GetEnumerator());
            }

            IEnumerator <Term> expected = superSet.GetEnumerator();
            IEnumerator <Term> actual   = new MergedEnumerator <Term>(subs.ToArray());

            while (actual.MoveNext())
            {
                Assert.IsTrue(expected.MoveNext());
                Assert.AreEqual(expected.Current, actual.Current);
            }
            Assert.IsFalse(expected.MoveNext());
        }
Esempio n. 4
0
        /// <summary>
        /// Merging constructor.
        /// </summary>
        /// <param name="toMerge"><see cref="FieldPhraseList"/>s to merge to build this one</param>
        public FieldPhraseList(FieldPhraseList[] toMerge)
        {
            // Merge all overlapping WeightedPhraseInfos
            // Step 1.  Sort by startOffset, endOffset, and boost, in that order.

            IEnumerator <WeightedPhraseInfo>[] allInfos = new IEnumerator <WeightedPhraseInfo> [toMerge.Length];
            try
            {
                int index = 0;
                foreach (FieldPhraseList fplToMerge in toMerge)
                {
                    allInfos[index++] = fplToMerge.phraseList.GetEnumerator();
                }
                using MergedEnumerator <WeightedPhraseInfo> itr = new MergedEnumerator <WeightedPhraseInfo>(false, allInfos);
                // Step 2.  Walk the sorted list merging infos that overlap
                phraseList = new List <WeightedPhraseInfo>();
                if (!itr.MoveNext())
                {
                    return;
                }
                List <WeightedPhraseInfo> work  = new List <WeightedPhraseInfo>();
                WeightedPhraseInfo        first = itr.Current;
                work.Add(first);
                int workEndOffset = first.EndOffset;
                while (itr.MoveNext())
                {
                    WeightedPhraseInfo current = itr.Current;
                    if (current.StartOffset <= workEndOffset)
                    {
                        workEndOffset = Math.Max(workEndOffset, current.EndOffset);
                        work.Add(current);
                    }
                    else
                    {
                        if (work.Count == 1)
                        {
                            phraseList.Add(work[0]);
                            work[0] = current;
                        }
                        else
                        {
                            phraseList.Add(new WeightedPhraseInfo(work));
                            work.Clear();
                            work.Add(current);
                        }
                        workEndOffset = current.EndOffset;
                    }
                }
                if (work.Count == 1)
                {
                    phraseList.Add(work[0]);
                }
                else
                {
                    phraseList.Add(new WeightedPhraseInfo(work));
                    work.Clear();
                }
            }
            finally
            {
                IOUtils.Dispose(allInfos);
            }
        }