/// <summary> /// Create a copy of this string converted to uppercase. /// </summary> /// <remarks> /// This obviously allocates a new array to hold the uppercase data. /// /// The length of the output is not necessarily the same as the length of the input. /// </remarks> public utf8 ToUpper() { if (Length == 0) { return(Empty); } var it = new Utf8Enumerator(this); int len = 0; while (it.MoveNext()) { len += CharInfo.Utf8Length(CharInfo.ToUpper(it.Current.Value)); } it.Reset(); var buf = new byte[len]; var stream = new MemoryStream(buf); len = 0; while (it.MoveNext()) { Utf8Writer.AppendCodepoint(stream, CharInfo.ToUpper(it.Current.Value)); } return(new utf8(buf)); }
/// <summary> /// Create a copy of this string converted to lowercase. /// </summary> /// <remarks> /// This obviously allocates a new array to hold the lowercase data. /// /// The length of the output is not necessarily the same as the length of the input. /// </remarks> public Utf8String ToLower(CultureInfo info) { if (Length == 0) { return(Empty); } var ch = new char[1]; var it = new Utf8Enumerator(this); int len = 0; while (it.MoveNext()) { if (it.Current.Value <= 0x1FFFF) { var lc = char.ToLower((char)it.Current.Value); ch[0] = lc; len += Encoding.UTF8.GetByteCount(ch); } else { // We're in emoji land, so no upper/lowercase. // (This will fail in the future should unicode get codepoints representing // upper/lowercase characters in this range.) len += it.Current.EncodedLength; } } it.Reset(); var buf = new byte[len]; while (it.MoveNext()) { if (it.Current.Value <= 0x1FFFF) { var lc = char.ToLower((char)it.Current.Value); ch[0] = lc; len += Encoding.UTF8.GetBytes(ch, 0, 1, buf, len); } else { // We're in emoji land, so no upper/lowercase. // (This will fail in the future should unicode get codepoints representing // upper/lowercase characters in this range.) for (int i = 0; i < it.Current.EncodedLength; i++) { buf[len] = this[i + it.Current.Index]; len++; } } } return(new Utf8String(buf)); }
/// <summary> /// Check the equality with the given System.String. /// </summary> public bool Equals(string s) { var it = new Utf8Enumerator(this); int i = 0; while (it.MoveNext()) { if (i >= s.Length) { return(false); } var c = s[i]; uint a; if (char.IsSurrogate(c)) { i++; a = (uint)char.ConvertToUtf32(c, s[i]); } else { a = (uint)c; } if (a != it.Current.Value) { return(false); } i++; } return(i == s.Length); }
/// <summary> /// Determine whether this string starts with the provided string. /// </summary> public bool StartsWith(string other) { var it = new Utf8Enumerator(this); for (int i = 0; i < other.Length; i++) { if (!it.MoveNext()) { return(false); } var c = other[i]; uint codepoint; if (char.IsSurrogate(c)) { i++; codepoint = (uint)char.ConvertToUtf32(c, other[i]); } else { codepoint = (uint)c; } if (codepoint != it.Current.Value) { return(false); } } return(true); }
/// <summary> /// Locate the given Unicode codepoint in this string. /// </summary> /// <remarks> /// So you can find all the poop emoji. /// </remarks> public int IndexOf(uint codepoint) { var it = new Utf8Enumerator(this); while (it.MoveNext()) { if (it.Current.Value == codepoint) { return(it.Current.Index); } } return(-1); }
/// <summary> /// Create a string consisting of this one, minus any leading whitespace. /// </summary> /// <remarks> /// This method does not allocate. /// </remarks> public Utf8String TrimStart() { var it = new Utf8Enumerator(this); while (it.MoveNext()) { if (!char.IsWhiteSpace((char)it.Current.Value)) { return(Substring(it.Current.Index)); } } return(Utf8String.Empty); }
/// <summary> /// Locate the first instance of a character in the given list within this string. /// </summary> public int IndexOfAny(params char[] chars) { var it = new Utf8Enumerator(this); while (it.MoveNext()) { for (int i = 0; i < chars.Length; i++) { if (it.Current.Value == chars[i]) { return(it.Current.Index); } } } return(-1); }
/// <summary> /// Locate the given System.String in this string. /// </summary> public int IndexOf(string other) { var it = new Utf8Enumerator(this); while (it.MoveNext()) { if (Length - it.Current.Index < other.Length) { break; } if (Substring(it.Current.Index).StartsWith(other)) { return(it.Current.Index); } } return(-1); }
/// <summary> /// Split this string based on the given separators (UTF codepoints). /// </summary> /// <remarks> /// Want to split a string based on the poop emoji character? Now you can! And not as a string! /// /// This allocates for the output array, but does not copy string data. /// </remarks> public IList <Utf8String> Split(uint[] splitOn) { var points = new List <Utf8String>(); var last = 0; var it = new Utf8Enumerator(this); while (it.MoveNext()) { for (int i = 0; i < splitOn.Length; i++) { if (it.Current.Value == splitOn[i]) { points.Add(Substring(last, it.Current.Index - last)); last = it.Current.Index; } } } points.Add(Substring(last, it.Current.Index - last)); return(points); }
/// <summary> /// Split this string based on the given separators (UTF codepoints). /// </summary> /// <remarks> /// You will not be able to split on certain characters (for instance, emoji). /// /// This allocates for the output array, but does not copy string data. /// </remarks> public Utf8String[] Split(char[] splitOn, int maxSplits = int.MaxValue) { var points = new List <Utf8String>(); var last = 0; var it = new Utf8Enumerator(this); while (it.MoveNext()) { for (int i = 0; i < splitOn.Length; i++) { if (it.Current.Value == (uint)splitOn[i]) { points.Add(Substring(last, it.Current.Index - last)); last = it.Current.Index + it.Current.EncodedLength; } } if (points.Count >= maxSplits - 1) { break; } } points.Add(Substring(last, Length - last)); return(points.ToArray()); }
public bool MoveNext() { if (currStart >= str.Length) { return(false); } currStart = nextStart; var remaining = str.Substring(nextStart); var it = new Utf8Enumerator(remaining); while (it.MoveNext()) { if (it.Current.Value == codepoint) { currEnd = it.Current.Index + currStart; nextStart = currEnd + it.Current.EncodedLength; return(true); } } currEnd = nextStart = str.Length; return(true); }