/// <summary> /// Merging constructor. /// </summary> /// <param name="toMerge"><see cref="FieldPhraseList"/>s to merge to build this one</param> public FieldPhraseList(FieldPhraseList[] toMerge) { // Merge all overlapping WeightedPhraseInfos // Step 1. Sort by startOffset, endOffset, and boost, in that order. IEnumerator <WeightedPhraseInfo>[] allInfos = new IEnumerator <WeightedPhraseInfo> [toMerge.Length]; int index = 0; foreach (FieldPhraseList fplToMerge in toMerge) { allInfos[index++] = fplToMerge.phraseList.GetEnumerator(); } MergedIterator <WeightedPhraseInfo> itr = new MergedIterator <WeightedPhraseInfo>(false, allInfos); // Step 2. Walk the sorted list merging infos that overlap phraseList = new List <WeightedPhraseInfo>(); if (!itr.MoveNext()) { return; } List <WeightedPhraseInfo> work = new List <WeightedPhraseInfo>(); WeightedPhraseInfo first = itr.Current; work.Add(first); int workEndOffset = first.EndOffset; while (itr.MoveNext()) { WeightedPhraseInfo current = itr.Current; if (current.StartOffset <= workEndOffset) { workEndOffset = Math.Max(workEndOffset, current.EndOffset); work.Add(current); } else { if (work.Count == 1) { phraseList.Add(work[0]); work[0] = current; } else { phraseList.Add(new WeightedPhraseInfo(work)); work.Clear(); work.Add(current); } workEndOffset = current.EndOffset; } } if (work.Count == 1) { phraseList.Add(work[0]); } else { phraseList.Add(new WeightedPhraseInfo(work)); work.Clear(); } }
/// <summary> /// Merging constructor. Note that this just grabs seqnum from the first info. /// </summary> public WeightedPhraseInfo(ICollection <WeightedPhraseInfo> toMerge) { // Pretty much the same idea as merging FieldPhraseLists: // Step 1. Sort by startOffset, endOffset // While we are here merge the boosts and termInfos IEnumerator <WeightedPhraseInfo> toMergeItr = toMerge.GetEnumerator(); if (!toMergeItr.MoveNext()) { throw new ArgumentException("toMerge must contain at least one WeightedPhraseInfo."); } WeightedPhraseInfo first = toMergeItr.Current; IEnumerator <Toffs>[] allToffs = new IEnumerator <Toffs> [toMerge.Count]; termsInfos = new List <TermInfo>(); seqnum = first.seqnum; boost = first.boost; allToffs[0] = first.termsOffsets.GetEnumerator(); int index = 1; while (toMergeItr.MoveNext()) { WeightedPhraseInfo info = toMergeItr.Current; boost += info.boost; termsInfos.AddAll(info.termsInfos); allToffs[index++] = info.termsOffsets.GetEnumerator(); } // Step 2. Walk the sorted list merging overlaps MergedIterator <Toffs> itr = new MergedIterator <Toffs>(false, allToffs); termsOffsets = new List <Toffs>(); if (!itr.MoveNext()) { return; } Toffs work = itr.Current; while (itr.MoveNext()) { Toffs current = itr.Current; if (current.StartOffset <= work.EndOffset) { work.EndOffset = Math.Max(work.EndOffset, current.EndOffset); } else { termsOffsets.Add(work); work = current; } } termsOffsets.Add(work); }
private static List <T> MergeLists <T>(int offset, int count, MergedIterator <T> mergedIter) { for (int c = 0; c < offset && mergedIter.MoveNext(); c++) { var x = mergedIter.Current; } List <T> mergedList = new List <T>(); for (int c = 0; c < count && mergedIter.MoveNext(); c++) { mergedList.Add(mergedIter.Current); } return(mergedList); }
public virtual void TestMergeOne() { Term t1 = new Term("foo", "a"); PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder(); b1.Add(t1); PrefixCodedTerms pb1 = b1.Finish(); Term t2 = new Term("foo", "b"); PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder(); b2.Add(t2); PrefixCodedTerms pb2 = b2.Finish(); IEnumerator<Term> merged = new MergedIterator<Term>(pb1.GetEnumerator(), pb2.GetEnumerator()); Assert.IsTrue(merged.MoveNext()); Assert.AreEqual(t1, merged.Current); Assert.IsTrue(merged.MoveNext()); Assert.AreEqual(t2, merged.Current); }
public virtual void TestMergeOne() { Term t1 = new Term("foo", "a"); PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder(); b1.Add(t1); PrefixCodedTerms pb1 = b1.Finish(); Term t2 = new Term("foo", "b"); PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder(); b2.Add(t2); PrefixCodedTerms pb2 = b2.Finish(); IEnumerator <Term> merged = new MergedIterator <Term>(pb1.GetEnumerator(), pb2.GetEnumerator()); Assert.IsTrue(merged.MoveNext()); Assert.AreEqual(t1, merged.Current); Assert.IsTrue(merged.MoveNext()); Assert.AreEqual(t2, merged.Current); }
public virtual void TestMergeEmpty() { IEnumerator<int> merged = new MergedIterator<int>(); Assert.IsFalse(merged.MoveNext()); merged = new MergedIterator<int>((new List<int>()).GetEnumerator()); Assert.IsFalse(merged.MoveNext()); IEnumerator<int>[] itrs = new IEnumerator<int>[Random().Next(100)]; for (int i = 0; i < itrs.Length; i++) { itrs[i] = (new List<int>()).GetEnumerator(); } merged = new MergedIterator<int>(itrs); Assert.IsFalse(merged.MoveNext()); }
public virtual void TestMergeRandom() { PrefixCodedTerms[] pb = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)]; JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>(); for (int i = 0; i < pb.Length; i++) { JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>(); int nterms = TestUtil.NextInt32(Random, 0, 10000); for (int j = 0; j < nterms; j++) { Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4)); terms.Add(term); } superSet.UnionWith(terms); PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); foreach (Term @ref in terms) { b.Add(@ref); } pb[i] = b.Finish(); } List <IEnumerator <Term> > subs = new List <IEnumerator <Term> >(); for (int i = 0; i < pb.Length; i++) { subs.Add(pb[i].GetEnumerator()); } IEnumerator <Term> expected = superSet.GetEnumerator(); IEnumerator <Term> actual = new MergedIterator <Term>(subs.ToArray()); while (actual.MoveNext()) { Assert.IsTrue(expected.MoveNext()); Assert.AreEqual(expected.Current, actual.Current); } Assert.IsFalse(expected.MoveNext()); }
public virtual void TestMergeRandom() { PrefixCodedTerms[] pb = new PrefixCodedTerms[TestUtil.NextInt(Random(), 2, 10)]; SortedSet<Term> superSet = new SortedSet<Term>(); for (int i = 0; i < pb.Length; i++) { SortedSet<Term> terms = new SortedSet<Term>(); int nterms = TestUtil.NextInt(Random(), 0, 10000); for (int j = 0; j < nterms; j++) { Term term = new Term(TestUtil.RandomUnicodeString(Random(), 2), TestUtil.RandomUnicodeString(Random(), 4)); terms.Add(term); } superSet.AddAll(terms); PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); foreach (Term @ref in terms) { b.Add(@ref); } pb[i] = b.Finish(); } List<IEnumerator<Term>> subs = new List<IEnumerator<Term>>(); for (int i = 0; i < pb.Length; i++) { subs.Add(pb[i].GetEnumerator()); } IEnumerator<Term> expected = superSet.GetEnumerator(); IEnumerator<Term> actual = new MergedIterator<Term>(subs.ToArray()); while (actual.MoveNext()) { Assert.IsTrue(expected.MoveNext()); Assert.AreEqual(expected.Current, actual.Current); } Assert.IsFalse(expected.MoveNext()); }
private void TestCase(int itrsWithVal, int specifiedValsOnItr, bool removeDups) { // Build a random number of lists IList<int?> expected = new List<int?>(); Random random = new Random(Random().Next()); int numLists = itrsWithVal + random.Next(1000 - itrsWithVal); IList<int>[] lists = new IList<int>[numLists]; for (int i = 0; i < numLists; i++) { lists[i] = new List<int>(); } int start = random.Next(1000000); int end = start + VALS_TO_MERGE / itrsWithVal / Math.Abs(specifiedValsOnItr); for (int i = start; i < end; i++) { int maxList = lists.Length; int maxValsOnItr = 0; int sumValsOnItr = 0; for (int itrWithVal = 0; itrWithVal < itrsWithVal; itrWithVal++) { int list = random.Next(maxList); int valsOnItr = specifiedValsOnItr < 0 ? (1 + random.Next(-specifiedValsOnItr)) : specifiedValsOnItr; maxValsOnItr = Math.Max(maxValsOnItr, valsOnItr); sumValsOnItr += valsOnItr; for (int valOnItr = 0; valOnItr < valsOnItr; valOnItr++) { lists[list].Add(i); } maxList = maxList - 1; ArrayUtil.Swap(lists, list, maxList); } int maxCount = removeDups ? maxValsOnItr : sumValsOnItr; for (int count = 0; count < maxCount; count++) { expected.Add(i); } } // Now check that they get merged cleanly IEnumerator<int>[] itrs = new IEnumerator<int>[numLists]; for (int i = 0; i < numLists; i++) { itrs[i] = lists[i].GetEnumerator(); } MergedIterator<int> mergedItr = new MergedIterator<int>(removeDups, itrs); IEnumerator<int?> expectedItr = expected.GetEnumerator(); while (expectedItr.MoveNext()) { Assert.IsTrue(mergedItr.MoveNext()); Assert.AreEqual(expectedItr.Current, mergedItr.Current); } Assert.IsFalse(mergedItr.MoveNext()); }