예제 #1
0
        static List <TextClassificationTag> Merge(List <TextClassificationTag> list)
        {
            if (list.Count <= 1)
            {
                return(list);
            }

            var prev = list[0];
            int read = 1, write = 0;

            for (; read < list.Count; read++)
            {
                var a = list[read];
                if (prev.ClassificationType == a.ClassificationType && prev.Span.End == a.Span.Start)
                {
                    list[write] = prev = new TextClassificationTag(Span.FromBounds(prev.Span.Start, a.Span.End), prev.ClassificationType);
                }
                else
                {
                    prev          = a;
                    list[++write] = a;
                }
            }
            write++;
            if (list.Count != write)
            {
                list.RemoveRange(write, list.Count - write);
            }

            return(list);
        }
예제 #2
0
        public IEnumerable <TextClassificationTag> GetTags(TextClassifierContext context)
        {
            var list = new List <TextClassificationTag>();

            int textLength = context.Text.Length;

            foreach (var classifier in textClassifiers)
            {
                foreach (var tagTmp in classifier.GetTags(context))
                {
                    var tag = tagTmp;
                    if (tag.Span.End > textLength)
                    {
                        tag = new TextClassificationTag(Span.FromBounds(Math.Min(textLength, tag.Span.Start), Math.Min(textLength, tag.Span.End)), tag.ClassificationType);
                    }
                    if (tag.Span.Length == 0)
                    {
                        continue;
                    }
                    list.Add(tag);
                }
            }

            if (list.Count <= 1)
            {
                return(list);
            }

            list.Sort(TextClassificationTagComparer.Instance);

            // Common case
            if (!HasOverlaps(list))
            {
                return(Merge(list));
            }

            int min       = 0;
            int minOffset = 0;
            var newList   = new List <TextClassificationTag>();
            var ctList    = new List <IClassificationType>();

            while (min < list.Count)
            {
                while (min < list.Count && minOffset >= list[min].Span.End)
                {
                    min++;
                }
                if (min >= list.Count)
                {
                    break;
                }
                var cspan = list[min];
                minOffset = Math.Max(minOffset, cspan.Span.Start);
                int end = cspan.Span.End;
                ctList.Clear();
                ctList.Add(cspan.ClassificationType);
                for (int i = min + 1; i < list.Count; i++)
                {
                    cspan = list[i];
                    int cspanStart = cspan.Span.Start;
                    if (cspanStart > minOffset)
                    {
                        if (cspanStart < end)
                        {
                            end = cspanStart;
                        }
                        break;
                    }
                    int cspanEnd = cspan.Span.End;
                    if (minOffset >= cspanEnd)
                    {
                        continue;
                    }
                    if (cspanEnd < end)
                    {
                        end = cspanEnd;
                    }
                    if (!ctList.Contains(cspan.ClassificationType))
                    {
                        ctList.Add(cspan.ClassificationType);
                    }
                }
                Debug.Assert(minOffset < end);
                var newSpan = new Span(minOffset, end - minOffset);
                var ct      = ctList.Count == 1 ? ctList[0] : classificationTypeRegistryService.CreateTransientClassificationType(ctList);
                newList.Add(new TextClassificationTag(newSpan, ct));
                minOffset = end;
            }

            Debug.Assert(!HasOverlaps(newList));
            return(Merge(newList));
        }