/// <summary> /// Merging constructor. Note that this just grabs seqnum from the first info. /// </summary> public WeightedPhraseInfo(ICollection <WeightedPhraseInfo> toMerge) { IEnumerator <Toffs>[] allToffs = new IEnumerator <Toffs> [toMerge.Count]; try { // Pretty much the same idea as merging FieldPhraseLists: // Step 1. Sort by startOffset, endOffset // While we are here merge the boosts and termInfos using (IEnumerator <WeightedPhraseInfo> toMergeItr = toMerge.GetEnumerator()) { if (!toMergeItr.MoveNext()) { throw new ArgumentException("toMerge must contain at least one WeightedPhraseInfo."); } WeightedPhraseInfo first = toMergeItr.Current; termsInfos = new List <TermInfo>(); seqnum = first.seqnum; boost = first.boost; allToffs[0] = first.termsOffsets.GetEnumerator(); int index = 1; while (toMergeItr.MoveNext()) { WeightedPhraseInfo info = toMergeItr.Current; boost += info.boost; termsInfos.AddRange(info.termsInfos); allToffs[index++] = info.termsOffsets.GetEnumerator(); } // Step 2. Walk the sorted list merging overlaps using (MergedEnumerator <Toffs> itr = new MergedEnumerator <Toffs>(false, allToffs)) { termsOffsets = new List <Toffs>(); if (!itr.MoveNext()) { return; } Toffs work = itr.Current; while (itr.MoveNext()) { Toffs current = itr.Current; if (current.StartOffset <= work.EndOffset) { work.EndOffset = Math.Max(work.EndOffset, current.EndOffset); } else { termsOffsets.Add(work); work = current; } } termsOffsets.Add(work); } } } finally { IOUtils.Dispose(allToffs); } }
public virtual void TestMergeOne() { Term t1 = new Term("foo", "a"); PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder(); b1.Add(t1); PrefixCodedTerms pb1 = b1.Finish(); Term t2 = new Term("foo", "b"); PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder(); b2.Add(t2); PrefixCodedTerms pb2 = b2.Finish(); IEnumerator <Term> merged = new MergedEnumerator <Term>(pb1.GetEnumerator(), pb2.GetEnumerator()); Assert.IsTrue(merged.MoveNext()); Assert.AreEqual(t1, merged.Current); Assert.IsTrue(merged.MoveNext()); Assert.AreEqual(t2, merged.Current); }
public virtual void TestMergeRandom() { PrefixCodedTerms[] pb = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)]; JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>(); for (int i = 0; i < pb.Length; i++) { JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>(); int nterms = TestUtil.NextInt32(Random, 0, 10000); for (int j = 0; j < nterms; j++) { Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4)); terms.Add(term); } superSet.UnionWith(terms); PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); foreach (Term @ref in terms) { b.Add(@ref); } pb[i] = b.Finish(); } List <IEnumerator <Term> > subs = new List <IEnumerator <Term> >(); for (int i = 0; i < pb.Length; i++) { subs.Add(pb[i].GetEnumerator()); } IEnumerator <Term> expected = superSet.GetEnumerator(); IEnumerator <Term> actual = new MergedEnumerator <Term>(subs.ToArray()); while (actual.MoveNext()) { Assert.IsTrue(expected.MoveNext()); Assert.AreEqual(expected.Current, actual.Current); } Assert.IsFalse(expected.MoveNext()); }
/// <summary> /// Merging constructor. /// </summary> /// <param name="toMerge"><see cref="FieldPhraseList"/>s to merge to build this one</param> public FieldPhraseList(FieldPhraseList[] toMerge) { // Merge all overlapping WeightedPhraseInfos // Step 1. Sort by startOffset, endOffset, and boost, in that order. IEnumerator <WeightedPhraseInfo>[] allInfos = new IEnumerator <WeightedPhraseInfo> [toMerge.Length]; try { int index = 0; foreach (FieldPhraseList fplToMerge in toMerge) { allInfos[index++] = fplToMerge.phraseList.GetEnumerator(); } using MergedEnumerator <WeightedPhraseInfo> itr = new MergedEnumerator <WeightedPhraseInfo>(false, allInfos); // Step 2. Walk the sorted list merging infos that overlap phraseList = new List <WeightedPhraseInfo>(); if (!itr.MoveNext()) { return; } List <WeightedPhraseInfo> work = new List <WeightedPhraseInfo>(); WeightedPhraseInfo first = itr.Current; work.Add(first); int workEndOffset = first.EndOffset; while (itr.MoveNext()) { WeightedPhraseInfo current = itr.Current; if (current.StartOffset <= workEndOffset) { workEndOffset = Math.Max(workEndOffset, current.EndOffset); work.Add(current); } else { if (work.Count == 1) { phraseList.Add(work[0]); work[0] = current; } else { phraseList.Add(new WeightedPhraseInfo(work)); work.Clear(); work.Add(current); } workEndOffset = current.EndOffset; } } if (work.Count == 1) { phraseList.Add(work[0]); } else { phraseList.Add(new WeightedPhraseInfo(work)); work.Clear(); } } finally { IOUtils.Dispose(allInfos); } }