public override string ToString() { // get length first // TODO: Optimize for characters of length 1 or 2 in UTF-8 representation (no need to read anything) // TODO: is compiler gonna do the right thing here? // TODO: Should we use Linq's Count()? int len = 0; foreach (var codePoint in CodePoints) { len++; if (!UnicodeCodePoint.IsBmp(codePoint)) { len++; } } unsafe { Span <byte> buffer; char * stackChars = null; char[] characters = null; if (len <= 256) { char *stackallocedChars = stackalloc char[len]; stackChars = stackallocedChars; buffer = new Span <byte>(stackChars, len * 2); } else { // HACK: Can System.Buffers be used here? characters = new char[len]; buffer = characters.Slice().Cast <char, byte>(); } foreach (var codePoint in CodePoints) { int bytesEncoded; if (!Utf16LittleEndianEncoder.TryEncodeCodePoint(codePoint, buffer, out bytesEncoded)) { // TODO: Change Exception type throw new Exception("invalid character"); } buffer = buffer.Slice(bytesEncoded); } // TODO: We already have a char[] and this will copy, how to avoid that return(stackChars != null ? new string(stackChars, 0, len) : new string(characters)); } }
public static bool TryEncodeCodePoint(UnicodeCodePoint codePoint, Span <byte> buffer, out int encodedBytes) { if (!UnicodeCodePoint.IsSupportedCodePoint(codePoint)) { encodedBytes = default(int); return(false); } encodedBytes = UnicodeCodePoint.IsBmp(codePoint) ? 2 : 4; if (buffer.Length < encodedBytes) { codePoint = default(UnicodeCodePoint); encodedBytes = default(int); // buffer too small return(false); } if (encodedBytes == 2) { unchecked { buffer.Write((ushort)codePoint); } } else { unchecked { uint codePointValue = (uint)codePoint; uint highSurrogate = ((codePointValue - 0x010000u) >> 10) + UnicodeConstants.Utf16HighSurrogateFirstCodePoint; uint lowSurrogate = (codePointValue & MaskLow10Bits) + UnicodeConstants.Utf16LowSurrogateFirstCodePoint; buffer.Write(highSurrogate | (lowSurrogate << 16)); } } return(true); }