Пример #1
0
        public string TokenizedValue(bool mergeEntities = false)
        {
            var sb = new StringBuilder(Value.Length + TokensCount * 10 + 100);

            for (int i = 0; i < SpanBounds.Count(); i++)
            {
                foreach (var token in this[i])
                {
                    if (mergeEntities && token.EntityTypes.Any(et => et.Tag == EntityTag.Begin || et.Tag == EntityTag.Inside))
                    {
                        bool isHyphen      = token.ValueAsSpan.IsHyphen();
                        bool isNormalToken = !isHyphen && !token.ValueAsSpan.IsSentencePunctuation();
                        if (!isNormalToken)
                        {
                            if (sb[sb.Length - 1] == '_')
                            {
                                sb.Length--; //if we have a punctuation or hyphen, and the previous token added a '_', remove it here
                            }
                        }
                        if (!isHyphen)
                        {
                            sb.Append(token.Value);
                        }
                        else
                        {
                            sb.Append("_");
                        }
                        if (isNormalToken)
                        {
                            sb.Append("_");
                        }                                      //don't add _ when the token is already a hyphen
                    }
                    else
                    {
                        sb.Append(token.Value).Append(" ");
                    }
                }
            }
            return(Regex.Replace(sb.ToString(), @"\s+", " ").TrimEnd()); //Remove the last space added during the loop
        }
Пример #2
0
 public ISpan AddSpan(int begin, int end)
 {
     SpanBounds.Add(new int[] { begin, end });
     TokensData.Add(new List <TokenData>());
     return(new Span(this, SpanBounds.Count - 1));
 }
Пример #3
0
 public void Clear()
 {
     SpanBounds.Clear();
     TokensData.Clear();
 }