public unsafe ValueString CreateValueStringFromUtf8(byte[] arr, int start, int length) { if (start + length > arr.Length) { throw new ArgumentException(); } EnsureSpace(length * 2); fixed(char *ptr = str) { var ptrx = ptr + used; var offset = used; foreach (var codePoint in new Utf8String.CodePointEnumerable(arr, start, length)) { int charsEncoded; if (!Utf16LittleEndianEncoder.TryEncodeCodePoint(codePoint, ptrx, out charsEncoded)) { // TODO: Change Exception type throw new Exception("invalid character"); } ptrx += charsEncoded; } var strlen = (int)(ptrx - ptr - used); used += strlen; return(new ValueString(str, offset, strlen)); } }
public override string ToString() { // get length first // TODO: Optimize for characters of length 1 or 2 in UTF-8 representation (no need to read anything) // TODO: is compiler gonna do the right thing here? // TODO: Should we use Linq's Count()? int len = 0; foreach (var codePoint in CodePoints) { len++; if (!UnicodeCodePoint.IsBmp(codePoint)) { len++; } } unsafe { Span <byte> buffer; char * stackChars = null; char[] characters = null; if (len <= 256) { char *stackallocedChars = stackalloc char[len]; stackChars = stackallocedChars; buffer = new Span <byte>(stackChars, len * 2); } else { // HACK: Can System.Buffers be used here? characters = new char[len]; buffer = characters.Slice().Cast <char, byte>(); } foreach (var codePoint in CodePoints) { int bytesEncoded; if (!Utf16LittleEndianEncoder.TryEncodeCodePoint(codePoint, buffer, out bytesEncoded)) { // TODO: Change Exception type throw new Exception("invalid character"); } buffer = buffer.Slice(bytesEncoded); } // TODO: We already have a char[] and this will copy, how to avoid that return(stackChars != null ? new string(stackChars, 0, len) : new string(characters)); } }
public override string ToString() { // get length first // TODO: Optimize for characters of length 1 or 2 in UTF-8 representation (no need to read anything) // TODO: is compiler gonna do the right thing here? // TODO: Should we use Linq's Count()? int len = 0; foreach (var codePoint in CodePoints) { len++; if (UnicodeCodePoint.IsSurrogate(codePoint)) { len++; } } char[] characters = new char[len]; unsafe { fixed(char *pinnedCharacters = characters) { Span <byte> buffer = new Span <byte>((byte *)pinnedCharacters, len * 2); foreach (var codePoint in CodePoints) { int bytesEncoded; if (!Utf16LittleEndianEncoder.TryEncodeCodePoint(codePoint, buffer, out bytesEncoded)) { // TODO: Change Exception type throw new Exception("invalid character"); } buffer = buffer.Slice(bytesEncoded); } } } // TODO: We already have a char[] and this will copy, how to avoid that return(new string(characters)); }