Exemplo n.º 1
0
        /// <summary>Trims a string to have the given token count at max.</summary>
        /// <param name="value">The string to substring from the left side.</param>
        /// <param name="token">The max token count.</param>
        /// <returns>The new substring.</returns>
        private static string SubstringToken(string value, int token)
        {
            int tokens = 0;

            for (int i = 0; i < value.Length; i++)
            {
                int addToken = Ts3String.IsDoubleChar(value[i]) ? 2 : 1;
                if (tokens + addToken > token)
                {
                    return(value.Substring(0, i));
                }
                else
                {
                    tokens += addToken;
                }
            }
            return(value);
        }
        public static IEnumerable <string> Transform(string text, LongTextBehaviour behaviour, int limit = int.MaxValue)
        {
            switch (behaviour)
            {
            case LongTextBehaviour.Drop:
            case LongTextBehaviour.SplitHard:
                int tokenCnt  = 0;
                int lastSplit = 0;
                for (int i = 0; i < text.Length; i++)
                {
                    var prevTokenCnt = tokenCnt;
                    tokenCnt += Ts3String.IsDoubleChar(text[i]) ? 2 : 1;
                    if (tokenCnt > Ts3Const.MaxSizeTextMessage)                     // TODO >= ??
                    {
                        if (behaviour == LongTextBehaviour.Drop)
                        {
                            yield break;
                        }
                        yield return(text.Substring(lastSplit, i - lastSplit));

                        limit--;
                        if (limit == 0)
                        {
                            yield break;
                        }
                        lastSplit = i;
                        tokenCnt -= prevTokenCnt;
                    }
                }
                yield return(text.Substring(lastSplit));

                break;

            case LongTextBehaviour.Split:
                tokenCnt  = 0;
                lastSplit = 0;
                var splitIndices = new (int i, int tok)[SeparatorWeight.Length];
Exemplo n.º 3
0
        public static IEnumerable <string> Transform(string text, LongTextBehaviour behaviour, int limit = int.MaxValue, int maxMessageSize = Ts3Const.MaxSizeTextMessage)
        {
            if (maxMessageSize < 4)
            {
                throw new ArgumentOutOfRangeException(nameof(maxMessageSize), "The minimum split length must be at least 4 bytes to fit all utf8 characters");
            }

            // Assuming worst case that each UTF-8 character which epands to 4 bytes.
            // If the message is still shorter we can safely return in 1 block.
            if (text.Length * 4 <= Ts3Const.MaxSizeTextMessage)
            {
                return new[] { text }
            }
            ;

            var bytes = Encoding.UTF8.GetBytes(text);

            // If the entire text UTF-8 encoded fits in one message we can return early.
            if (bytes.Length * 2 < Ts3Const.MaxSizeTextMessage)
            {
                return new[] { text }
            }
            ;

            var        list         = new List <string>();
            Span <Ind> splitIndices = stackalloc Ind[SeparatorWeight.Length];

            var block = bytes.AsSpan();

            while (block.Length > 0)
            {
                int tokenCnt = 0;

                int  i      = 0;
                bool filled = false;

                for (; i < block.Length; i++)
                {
                    tokenCnt += Ts3String.IsDoubleChar(block[i]) ? 2 : 1;

                    if (tokenCnt > maxMessageSize)
                    {
                        if (behaviour == LongTextBehaviour.Drop)
                        {
                            return(Enumerable.Empty <string>());
                        }

                        filled = true;
                        break;
                    }

                    for (int j = 0; j < SeparatorWeight.Length; j++)
                    {
                        if (block[i] == SeparatorWeight[j])
                        {
                            splitIndices[j] = new Ind(i, tokenCnt);
                        }
                    }
                }

                if (!filled)
                {
                    list.Add(block.NewUtf8String());
                    break;
                }

                bool hasSplit = false;
                if (behaviour != LongTextBehaviour.SplitHard)
                {
                    for (int j = 0; j < SeparatorWeight.Length; j++)
                    {
                        if (!hasSplit && splitIndices[j].i > 0)
                        {
                            list.Add(block.Slice(0, splitIndices[j].i + 1).NewUtf8String());
                            block    = block.Slice(splitIndices[j].i + 1);
                            hasSplit = true;
                        }
                    }
                    splitIndices.Fill(new Ind());
                }

                if (!hasSplit)
                {
                    // UTF-8 adjustment
                    while (i > 0 && (block[i] & 0xC0) == 0x80)
                    {
                        i--;
                    }

                    list.Add(block.Slice(0, i).NewUtf8String());
                    block = block.Slice(i);
                }

                if (--limit == 0)
                {
                    break;
                }
            }
            return(list);
        }