static List <TextClassificationTag> Merge(List <TextClassificationTag> list) { if (list.Count <= 1) { return(list); } var prev = list[0]; int read = 1, write = 0; for (; read < list.Count; read++) { var a = list[read]; if (prev.ClassificationType == a.ClassificationType && prev.Span.End == a.Span.Start) { list[write] = prev = new TextClassificationTag(Span.FromBounds(prev.Span.Start, a.Span.End), prev.ClassificationType); } else { prev = a; list[++write] = a; } } write++; if (list.Count != write) { list.RemoveRange(write, list.Count - write); } return(list); }
public IEnumerable <TextClassificationTag> GetTags(TextClassifierContext context) { var list = new List <TextClassificationTag>(); int textLength = context.Text.Length; foreach (var classifier in textClassifiers) { foreach (var tagTmp in classifier.GetTags(context)) { var tag = tagTmp; if (tag.Span.End > textLength) { tag = new TextClassificationTag(Span.FromBounds(Math.Min(textLength, tag.Span.Start), Math.Min(textLength, tag.Span.End)), tag.ClassificationType); } if (tag.Span.Length == 0) { continue; } list.Add(tag); } } if (list.Count <= 1) { return(list); } list.Sort(TextClassificationTagComparer.Instance); // Common case if (!HasOverlaps(list)) { return(Merge(list)); } int min = 0; int minOffset = 0; var newList = new List <TextClassificationTag>(); var ctList = new List <IClassificationType>(); while (min < list.Count) { while (min < list.Count && minOffset >= list[min].Span.End) { min++; } if (min >= list.Count) { break; } var cspan = list[min]; minOffset = Math.Max(minOffset, cspan.Span.Start); int end = cspan.Span.End; ctList.Clear(); ctList.Add(cspan.ClassificationType); for (int i = min + 1; i < list.Count; i++) { cspan = list[i]; int cspanStart = cspan.Span.Start; if (cspanStart > minOffset) { if (cspanStart < end) { end = cspanStart; } break; } int cspanEnd = cspan.Span.End; if (minOffset >= cspanEnd) { continue; } if (cspanEnd < end) { end = cspanEnd; } if (!ctList.Contains(cspan.ClassificationType)) { ctList.Add(cspan.ClassificationType); } } Debug.Assert(minOffset < end); var newSpan = new Span(minOffset, end - minOffset); var ct = ctList.Count == 1 ? ctList[0] : classificationTypeRegistryService.CreateTransientClassificationType(ctList); newList.Add(new TextClassificationTag(newSpan, ct)); minOffset = end; } Debug.Assert(!HasOverlaps(newList)); return(Merge(newList)); }