Exemple #1
0
        /// <summary>
        /// Determine whether this string starts with the provided string.
        /// </summary>
        public bool StartsWith(string other)
        {
            var it = new Utf8Enumerator(this);

            for (int i = 0; i < other.Length; i++)
            {
                if (!it.MoveNext())
                {
                    return(false);
                }
                var  c = other[i];
                uint codepoint;
                if (char.IsSurrogate(c))
                {
                    i++;
                    codepoint = (uint)char.ConvertToUtf32(c, other[i]);
                }
                else
                {
                    codepoint = (uint)c;
                }
                if (codepoint != it.Current.Value)
                {
                    return(false);
                }
            }
            return(true);
        }
Exemple #2
0
        /// <summary>
        /// Check the equality with the given System.String.
        /// </summary>
        public bool Equals(string s)
        {
            var it = new Utf8Enumerator(this);
            int i  = 0;

            while (it.MoveNext())
            {
                if (i >= s.Length)
                {
                    return(false);
                }
                var  c = s[i];
                uint a;
                if (char.IsSurrogate(c))
                {
                    i++;
                    a = (uint)char.ConvertToUtf32(c, s[i]);
                }
                else
                {
                    a = (uint)c;
                }
                if (a != it.Current.Value)
                {
                    return(false);
                }
                i++;
            }
            return(i == s.Length);
        }
Exemple #3
0
        /// <summary>
        /// Create a copy of this string converted to uppercase.
        /// </summary>
        /// <remarks>
        /// This obviously allocates a new array to hold the uppercase data.
        ///
        /// The length of the output is not necessarily the same as the length of the input.
        /// </remarks>
        public utf8 ToUpper()
        {
            if (Length == 0)
            {
                return(Empty);
            }

            var it  = new Utf8Enumerator(this);
            int len = 0;

            while (it.MoveNext())
            {
                len += CharInfo.Utf8Length(CharInfo.ToUpper(it.Current.Value));
            }

            it.Reset();

            var buf    = new byte[len];
            var stream = new MemoryStream(buf);

            len = 0;
            while (it.MoveNext())
            {
                Utf8Writer.AppendCodepoint(stream, CharInfo.ToUpper(it.Current.Value));
            }

            return(new utf8(buf));
        }
Exemple #4
0
        /// <summary>
        /// Create a copy of this string converted to lowercase.
        /// </summary>
        /// <remarks>
        /// This obviously allocates a new array to hold the lowercase data.
        ///
        /// The length of the output is not necessarily the same as the length of the input.
        /// </remarks>
        public Utf8String ToLower(CultureInfo info)
        {
            if (Length == 0)
            {
                return(Empty);
            }

            var ch  = new char[1];
            var it  = new Utf8Enumerator(this);
            int len = 0;

            while (it.MoveNext())
            {
                if (it.Current.Value <= 0x1FFFF)
                {
                    var lc = char.ToLower((char)it.Current.Value);
                    ch[0] = lc;
                    len  += Encoding.UTF8.GetByteCount(ch);
                }
                else
                {
                    // We're in emoji land, so no upper/lowercase.
                    // (This will fail in the future should unicode get codepoints representing
                    // upper/lowercase characters in this range.)
                    len += it.Current.EncodedLength;
                }
            }

            it.Reset();

            var buf = new byte[len];

            while (it.MoveNext())
            {
                if (it.Current.Value <= 0x1FFFF)
                {
                    var lc = char.ToLower((char)it.Current.Value);
                    ch[0] = lc;
                    len  += Encoding.UTF8.GetBytes(ch, 0, 1, buf, len);
                }
                else
                {
                    // We're in emoji land, so no upper/lowercase.
                    // (This will fail in the future should unicode get codepoints representing
                    // upper/lowercase characters in this range.)
                    for (int i = 0; i < it.Current.EncodedLength; i++)
                    {
                        buf[len] = this[i + it.Current.Index];
                        len++;
                    }
                }
            }

            return(new Utf8String(buf));
        }
Exemple #5
0
        /// <summary>
        /// Locate the given Unicode codepoint in this string.
        /// </summary>
        /// <remarks>
        /// So you can find all the poop emoji.
        /// </remarks>
        public int IndexOf(uint codepoint)
        {
            var it = new Utf8Enumerator(this);

            while (it.MoveNext())
            {
                if (it.Current.Value == codepoint)
                {
                    return(it.Current.Index);
                }
            }
            return(-1);
        }
Exemple #6
0
        /// <summary>
        /// Create a string consisting of this one, minus any leading whitespace.
        /// </summary>
        /// <remarks>
        /// This method does not allocate.
        /// </remarks>
        public Utf8String TrimStart()
        {
            var it = new Utf8Enumerator(this);

            while (it.MoveNext())
            {
                if (!char.IsWhiteSpace((char)it.Current.Value))
                {
                    return(Substring(it.Current.Index));
                }
            }
            return(Utf8String.Empty);
        }
Exemple #7
0
        /// <summary>
        /// Locate the first instance of a character in the given list within this string.
        /// </summary>
        public int IndexOfAny(params char[] chars)
        {
            var it = new Utf8Enumerator(this);

            while (it.MoveNext())
            {
                for (int i = 0; i < chars.Length; i++)
                {
                    if (it.Current.Value == chars[i])
                    {
                        return(it.Current.Index);
                    }
                }
            }
            return(-1);
        }
Exemple #8
0
        /// <summary>
        /// Locate the given System.String in this string.
        /// </summary>
        public int IndexOf(string other)
        {
            var it = new Utf8Enumerator(this);

            while (it.MoveNext())
            {
                if (Length - it.Current.Index < other.Length)
                {
                    break;
                }
                if (Substring(it.Current.Index).StartsWith(other))
                {
                    return(it.Current.Index);
                }
            }
            return(-1);
        }
Exemple #9
0
        /// <summary>
        /// Split this string based on the given separators (UTF codepoints).
        /// </summary>
        /// <remarks>
        /// Want to split a string based on the poop emoji character? Now you can! And not as a string!
        ///
        /// This allocates for the output array, but does not copy string data.
        /// </remarks>
        public IList <Utf8String> Split(uint[] splitOn)
        {
            var points = new List <Utf8String>();
            var last   = 0;
            var it     = new Utf8Enumerator(this);

            while (it.MoveNext())
            {
                for (int i = 0; i < splitOn.Length; i++)
                {
                    if (it.Current.Value == splitOn[i])
                    {
                        points.Add(Substring(last, it.Current.Index - last));
                        last = it.Current.Index;
                    }
                }
            }
            points.Add(Substring(last, it.Current.Index - last));
            return(points);
        }
Exemple #10
0
        /// <summary>
        /// Split this string based on the given separators (UTF codepoints).
        /// </summary>
        /// <remarks>
        /// You will not be able to split on certain characters (for instance, emoji).
        ///
        /// This allocates for the output array, but does not copy string data.
        /// </remarks>
        public      Utf8String[] Split(char[] splitOn, int maxSplits = int.MaxValue)
        {
            var points = new List <Utf8String>();
            var last   = 0;
            var it     = new Utf8Enumerator(this);

            while (it.MoveNext())
            {
                for (int i = 0; i < splitOn.Length; i++)
                {
                    if (it.Current.Value == (uint)splitOn[i])
                    {
                        points.Add(Substring(last, it.Current.Index - last));
                        last = it.Current.Index + it.Current.EncodedLength;
                    }
                }
                if (points.Count >= maxSplits - 1)
                {
                    break;
                }
            }
            points.Add(Substring(last, Length - last));
            return(points.ToArray());
        }
Exemple #11
0
        public bool MoveNext()
        {
            if (currStart >= str.Length)
            {
                return(false);
            }
            currStart = nextStart;
            var remaining = str.Substring(nextStart);
            var it        = new Utf8Enumerator(remaining);


            while (it.MoveNext())
            {
                if (it.Current.Value == codepoint)
                {
                    currEnd   = it.Current.Index + currStart;
                    nextStart = currEnd + it.Current.EncodedLength;
                    return(true);
                }
            }

            currEnd = nextStart = str.Length;
            return(true);
        }