/// <summary> /// Merging constructor. /// </summary> /// <param name="toMerge"><see cref="FieldPhraseList"/>s to merge to build this one</param> public FieldPhraseList(FieldPhraseList[] toMerge) { // Merge all overlapping WeightedPhraseInfos // Step 1. Sort by startOffset, endOffset, and boost, in that order. IEnumerator <WeightedPhraseInfo>[] allInfos = new IEnumerator <WeightedPhraseInfo> [toMerge.Length]; int index = 0; foreach (FieldPhraseList fplToMerge in toMerge) { allInfos[index++] = fplToMerge.phraseList.GetEnumerator(); } MergedIterator <WeightedPhraseInfo> itr = new MergedIterator <WeightedPhraseInfo>(false, allInfos); // Step 2. Walk the sorted list merging infos that overlap phraseList = new List <WeightedPhraseInfo>(); if (!itr.MoveNext()) { return; } List <WeightedPhraseInfo> work = new List <WeightedPhraseInfo>(); WeightedPhraseInfo first = itr.Current; work.Add(first); int workEndOffset = first.EndOffset; while (itr.MoveNext()) { WeightedPhraseInfo current = itr.Current; if (current.StartOffset <= workEndOffset) { workEndOffset = Math.Max(workEndOffset, current.EndOffset); work.Add(current); } else { if (work.Count == 1) { phraseList.Add(work[0]); work[0] = current; } else { phraseList.Add(new WeightedPhraseInfo(work)); work.Clear(); work.Add(current); } workEndOffset = current.EndOffset; } } if (work.Count == 1) { phraseList.Add(work[0]); } else { phraseList.Add(new WeightedPhraseInfo(work)); work.Clear(); } }
public bool IsOffsetOverlap(WeightedPhraseInfo other) { int so = GetStartOffset(); int eo = GetEndOffset(); int oso = other.GetStartOffset(); int oeo = other.GetEndOffset(); if (so <= oso && oso < eo) { return(true); } if (so < oeo && oeo <= eo) { return(true); } if (oso <= so && so < oeo) { return(true); } if (oso < eo && eo <= oeo) { return(true); } return(false); }
void AddIfNoOverlap(WeightedPhraseInfo wpi) { foreach (WeightedPhraseInfo existWpi in phraseList) { if (existWpi.IsOffsetOverlap(wpi)) { return; } } phraseList.AddLast(wpi); }
public virtual void AddIfNoOverlap(WeightedPhraseInfo wpi) { foreach (WeightedPhraseInfo existWpi in PhraseList) { if (existWpi.IsOffsetOverlap(wpi)) { // WeightedPhraseInfo.addIfNoOverlap() dumps the second part of, for example, hyphenated words (social-economics). // The result is that all informations in TermInfo are lost and not available for further operations. existWpi.TermsInfos.AddRange(wpi.TermsInfos); return; } } PhraseList.Add(wpi); }
public bool IsOffsetOverlap(WeightedPhraseInfo other) { int so = StartOffset; int eo = EndOffset; int oso = other.StartOffset; int oeo = other.EndOffset; if (so <= oso && oso < eo) return true; if (so < oeo && oeo <= eo) return true; if (oso <= so && so < oeo) return true; if (oso < eo && eo <= oeo) return true; return false; }
void AddIfNoOverlap(WeightedPhraseInfo wpi) { foreach (WeightedPhraseInfo existWpi in phraseList) { if (existWpi.IsOffsetOverlap(wpi)) return; } phraseList.AddLast(wpi); }