コード例 #1
0
        /// <summary>Trims a string to have the given token count at max.</summary>
        /// <param name="value">The string to substring from the left side.</param>
        /// <param name="token">The max token count.</param>
        /// <returns>The new substring.</returns>
        private static string SubstringToken(string value, int token)
        {
            int tokens = 0;

            for (int i = 0; i < value.Length; i++)
            {
                int addToken = TsString.IsDoubleChar(value[i]) ? 2 : 1;
                if (tokens + addToken > token)
                {
                    return(value.Substring(0, i));
                }
                else
                {
                    tokens += addToken;
                }
            }
            return(value);
        }
コード例 #2
0
        public static IEnumerable <string> Transform(string text, LongTextBehaviour behaviour, int limit = int.MaxValue, int maxMessageSize = TsConst.MaxSizeTextMessage)
        {
            if (maxMessageSize < 4)
            {
                throw new ArgumentOutOfRangeException(nameof(maxMessageSize), "The minimum split length must be at least 4 bytes to fit all utf8 characters");
            }

            // Assuming worst case that each UTF-8 character which epands to 4 bytes.
            // If the message is still shorter we can safely return in 1 block.
            if (text.Length * 4 <= TsConst.MaxSizeTextMessage)
            {
                return new[] { text }
            }
            ;

            var bytes = Encoding.UTF8.GetBytes(text);

            // If the entire text UTF-8 encoded fits in one message we can return early.
            if (bytes.Length * 2 < TsConst.MaxSizeTextMessage)
            {
                return new[] { text }
            }
            ;

            var        list         = new List <string>();
            Span <Ind> splitIndices = stackalloc Ind[SeparatorWeight.Length];

            var block = bytes.AsSpan();

            while (block.Length > 0)
            {
                int tokenCnt = 0;

                int  i      = 0;
                bool filled = false;

                for (; i < block.Length; i++)
                {
                    tokenCnt += TsString.IsDoubleChar(block[i]) ? 2 : 1;

                    if (tokenCnt > maxMessageSize)
                    {
                        if (behaviour == LongTextBehaviour.Drop)
                        {
                            return(Enumerable.Empty <string>());
                        }

                        filled = true;
                        break;
                    }

                    for (int j = 0; j < SeparatorWeight.Length; j++)
                    {
                        if (block[i] == SeparatorWeight[j])
                        {
                            splitIndices[j] = new Ind(i, tokenCnt);
                        }
                    }
                }

                if (!filled)
                {
                    list.Add(block.NewUtf8String());
                    break;
                }

                bool hasSplit = false;
                if (behaviour != LongTextBehaviour.SplitHard)
                {
                    for (int j = 0; j < SeparatorWeight.Length; j++)
                    {
                        if (!hasSplit && splitIndices[j].i > 0)
                        {
                            list.Add(block.Slice(0, splitIndices[j].i + 1).NewUtf8String());
                            block    = block.Slice(splitIndices[j].i + 1);
                            hasSplit = true;
                        }
                    }
                    splitIndices.Fill(new Ind());
                }

                if (!hasSplit)
                {
                    // UTF-8 adjustment
                    while (i > 0 && (block[i] & 0xC0) == 0x80)
                    {
                        i--;
                    }

                    list.Add(block.Slice(0, i).NewUtf8String());
                    block = block.Slice(i);
                }

                if (--limit == 0)
                {
                    break;
                }
            }
            return(list);
        }