// Internal version of "GetByteCount" which can handle a rolling // state between multiple calls to this method. static int InternalGetByteCount (char[] chars, int index, int count, EncoderFallback fallback, ref char leftOver, bool flush) { // Validate the parameters. if (chars == null) throw new ArgumentNullException ("chars"); if (index < 0 || index > chars.Length) throw new ArgumentOutOfRangeException ("index"); if (count < 0 || count > (chars.Length - index)) throw new ArgumentOutOfRangeException ("count"); if (index == chars.Length) { if (flush && leftOver != '\0') { // Flush the left-over surrogate pair start. leftOver = '\0'; return 3; } return 0; } unsafe { fixed (char* cptr = chars) { return InternalGetByteCount (cptr + index, count, fallback, ref leftOver, flush); } } }
unsafe static int InternalGetByteCount (char* chars, int count, EncoderFallback fallback, ref char leftOver, bool flush) { EncoderFallbackBuffer buffer = null; char* end = chars + count; char* start = chars; int length = 0; while (chars < end) { if (leftOver == 0) { for (; chars < end; chars++) { if (*chars < '\x80') { ++length; } else if (*chars < '\x800') { length += 2; } else if (*chars < '\uD800' || *chars > '\uDFFF') { length += 3; } else if (*chars <= '\uDBFF') { // This is a surrogate start char, exit the inner loop only // if we don't find the complete surrogate pair. if (chars + 1 < end && chars [1] >= '\uDC00' && chars [1] <= '\uDFFF') { length += 4; chars++; continue; } leftOver = *chars; chars++; break; } else { // We have a surrogate tail without // leading surrogate. char[] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer); fixed (char *fb_chars = fallback_chars) { char dummy = '\0'; length += InternalGetByteCount (fb_chars, fallback_chars.Length, fallback, ref dummy, true); } leftOver = '\0'; } } } else { if (*chars >= '\uDC00' && *chars <= '\uDFFF') { // We have a correct surrogate pair. length += 4; chars++; } else { // We have a surrogate start followed by a // regular character. Technically, this is // invalid, but we have to do something. // We write out the surrogate start and then // re-visit the current character again. char[] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer); fixed (char *fb_chars = fallback_chars) { char dummy = '\0'; length += InternalGetByteCount (fb_chars, fallback_chars.Length, fallback, ref dummy, true); } } leftOver = '\0'; } } if (flush) { // Flush the left-over surrogate pair start. if (leftOver != '\0') { length += 3; leftOver = '\0'; } } return length; }
// Constructor. public UTF8Encoder (EncoderFallback fallback, bool emitIdentifier) { Fallback = fallback; //this.emitIdentifier = emitIdentifier; leftOverForCount = '\0'; leftOverForConv = '\0'; }
unsafe static int InternalGetBytes (char* chars, int count, byte* bytes, int bcount, EncoderFallback fallback, ref EncoderFallbackBuffer buffer, ref char leftOver, bool flush) { byte* end_bytes = bytes + bcount; byte* start_bytes = bytes; char* end = chars + count; char* start = chars; while (chars < end) { if (leftOver == 0) { for (; chars < end; chars++) { int ch = *chars; if (ch < '\x80') { if (bytes >= end_bytes) goto fail_no_space; *bytes++ = (byte) ch; } else if (ch < '\x800') { if (bytes + 1 >= end_bytes) goto fail_no_space; bytes[0] = (byte) (0xC0 | (ch >> 6)); bytes[1] = (byte) (0x80 | (ch & 0x3F)); bytes += 2; } else if (ch < '\uD800' || ch > '\uDFFF') { if (bytes + 2 >= end_bytes) goto fail_no_space; bytes[0] = (byte) (0xE0 | (ch >> 12)); bytes[1] = (byte) (0x80 | ((ch >> 6) & 0x3F)); bytes[2] = (byte) (0x80 | (ch & 0x3F)); bytes += 3; } else if (ch <= '\uDBFF') { // This is a surrogate char, exit the inner loop. leftOver = *chars; chars++; break; } else { // We have a surrogate tail without // leading surrogate. char[] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer); char dummy = '\0'; if (bytes + InternalGetByteCount (fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes) goto fail_no_space; fixed (char *fb_chars = fallback_chars) { bytes += InternalGetBytes (fb_chars, fallback_chars.Length, bytes, bcount - (int) (bytes - start_bytes), fallback, ref buffer, ref dummy, true); } leftOver = '\0'; } } } else { if (*chars >= '\uDC00' && *chars <= '\uDFFF') { // We have a correct surrogate pair. int ch = 0x10000 + (int) *chars - 0xDC00 + (((int) leftOver - 0xD800) << 10); if (bytes + 3 >= end_bytes) goto fail_no_space; bytes[0] = (byte) (0xF0 | (ch >> 18)); bytes[1] = (byte) (0x80 | ((ch >> 12) & 0x3F)); bytes[2] = (byte) (0x80 | ((ch >> 6) & 0x3F)); bytes[3] = (byte) (0x80 | (ch & 0x3F)); bytes += 4; chars++; } else { // We have a surrogate start followed by a // regular character. Technically, this is // invalid, but we have to do something. // We write out the surrogate start and then // re-visit the current character again. char[] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer); char dummy = '\0'; if (bytes + InternalGetByteCount (fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes) goto fail_no_space; fixed (char *fb_chars = fallback_chars) { InternalGetBytes (fb_chars, fallback_chars.Length, bytes, bcount - (int) (bytes - start_bytes), fallback, ref buffer, ref dummy, true); } leftOver = '\0'; } leftOver = '\0'; } } if (flush) { // Flush the left-over surrogate pair start. if (leftOver != '\0') { int ch = leftOver; if (bytes + 2 < end_bytes) { bytes[0] = (byte) (0xE0 | (ch >> 12)); bytes[1] = (byte) (0x80 | ((ch >> 6) & 0x3F)); bytes[2] = (byte) (0x80 | (ch & 0x3F)); bytes += 3; } else { goto fail_no_space; } leftOver = '\0'; } } return (int) (bytes - (end_bytes - bcount)); fail_no_space: throw new ArgumentException ("Insufficient Space", "bytes"); }
// Internal version of "GetBytes" which can handle a rolling // state between multiple calls to this method. static int InternalGetBytes (char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, EncoderFallback fallback, ref EncoderFallbackBuffer buffer, ref char leftOver, bool flush) { // Validate the parameters. if (chars == null) throw new ArgumentNullException ("chars"); if (bytes == null) throw new ArgumentNullException ("bytes"); if (charIndex < 0 || charIndex > chars.Length) throw new ArgumentOutOfRangeException ("charIndex"); if (charCount < 0 || charCount > (chars.Length - charIndex)) throw new ArgumentOutOfRangeException ("charCount"); if (byteIndex < 0 || byteIndex > bytes.Length) throw new ArgumentOutOfRangeException ("byteIndex"); if (charIndex == chars.Length) { if (flush && leftOver != '\0') { // FIXME: use EncoderFallback. // // By default it is empty, so I do nothing for now. leftOver = '\0'; } return 0; } unsafe { fixed (char* cptr = chars) { if (bytes.Length == byteIndex) return InternalGetBytes (cptr + charIndex, charCount, null, 0, fallback, ref buffer, ref leftOver, flush); fixed (byte *bptr = bytes) { return InternalGetBytes (cptr + charIndex, charCount, bptr + byteIndex, bytes.Length - byteIndex, fallback, ref buffer, ref leftOver, flush); } } } }
unsafe static char[] GetFallbackChars (char* chars, char* start, EncoderFallback fallback, ref EncoderFallbackBuffer buffer) { if (buffer == null) buffer = fallback.CreateFallbackBuffer (); buffer.Fallback (*chars, (int) (chars - start)); char[] fallback_chars = new char [buffer.Remaining]; for (int i = 0; i < fallback_chars.Length; i++) fallback_chars [i] = buffer.GetNextChar (); buffer.Reset (); return fallback_chars; }
public static Encoding GetEncoding (string name, EncoderFallback encoderFallback, DecoderFallback decoderFallback) { if (encoderFallback == null) throw new ArgumentNullException ("encoderFallback"); if (decoderFallback == null) throw new ArgumentNullException ("decoderFallback"); var encoding = GetEncoding (name).Clone (); encoding.is_readonly = false; encoding.encoder_fallback = encoderFallback; encoding.decoder_fallback = decoderFallback; return encoding; }
internal void SetFallbackInternal (EncoderFallback e, DecoderFallback d) { if (e != null) encoder_fallback = e; if (d != null) decoder_fallback = d; }