internal BaseCodePageEncoding(int codepage, int dataCodePage, EncoderFallback enc, DecoderFallback dec) : base(codepage, enc, dec) { // Remember number of code pages that we'll be using the table for. dataTableCodePage = dataCodePage; LoadCodePageTables(); }
// Internal version of "GetByteCount" which can handle a rolling // state between multiple calls to this method. private static int InternalGetByteCount (char[] chars, int index, int count, EncoderFallback fallback, ref char leftOver, bool flush) { // Validate the parameters. if (chars == null) { throw new ArgumentNullException ("chars"); } if (index < 0 || index > chars.Length) { throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array")); } if (count < 0 || count > (chars.Length - index)) { throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array")); } if (index == chars.Length) { if (flush && leftOver != '\0') { // Flush the left-over surrogate pair start. leftOver = '\0'; return 3; } return 0; } unsafe { fixed (char* cptr = chars) { return InternalGetByteCount (cptr + index, count, fallback, ref leftOver, flush); } } }
// Constructor called by serialization. internal CodePageEncoding(SerializationInfo info, StreamingContext context) { // Any info? if (info==null) throw new ArgumentNullException(nameof(info)); Contract.EndContractBlock(); // All versions have a code page this.m_codePage = (int)info.GetValue("m_codePage", typeof(int)); // See if we have a code page try { // // Try Whidbey V2.0 Fields // this.m_isReadOnly = (bool)info.GetValue("m_isReadOnly", typeof(bool)); this.encoderFallback = (EncoderFallback)info.GetValue("encoderFallback", typeof(EncoderFallback)); this.decoderFallback = (DecoderFallback)info.GetValue("decoderFallback", typeof(DecoderFallback)); } catch (SerializationException) { // // Didn't have Whidbey things, must be Everett // this.m_deserializedFromEverett = true; // May as well be read only this.m_isReadOnly = true; } }
protected CodePageEncoding(int codePage, string name, string webName, EncoderFallback encoderFallback, DecoderFallback decoderFallback) : base(codePage, encoderFallback, decoderFallback) { codePage_ = codePage; encodingName_ = name; webName_ = webName; }
internal EncoderNLSSurrogate(SerializationInfo info, StreamingContext context) { if (info == null) { throw new ArgumentNullException(nameof(info)); } _encoding = (Encoding)info.GetValue(EncodingKey, typeof(Encoding)); _fallback = (EncoderFallback)info.GetValue(DecoderFallbackKey, typeof(EncoderFallback)); _charLeftOver = (char)info.GetValue(CharLeftOverKey, typeof(char)); }
public virtual Encoding GetEncoding(int codepage, EncoderFallback encoderFallback, DecoderFallback decoderFallback) { Encoding enc = GetEncoding(codepage); if (enc != null) { enc = (Encoding)GetEncoding(codepage).Clone(); enc.EncoderFallback = encoderFallback; enc.DecoderFallback = decoderFallback; } return enc; }
static bool Encoding_GetEncoding__String__EncoderFallback__DecoderFallback(JSVCall vc, int argc) { int len = argc; if (len == 3) { System.String arg0 = (System.String)JSApi.getStringS((int)JSApi.GetType.Arg); System.Text.EncoderFallback arg1 = (System.Text.EncoderFallback)JSMgr.datax.getObject((int)JSApi.GetType.Arg); System.Text.DecoderFallback arg2 = (System.Text.DecoderFallback)JSMgr.datax.getObject((int)JSApi.GetType.Arg); JSMgr.datax.setObject((int)JSApi.SetType.Rval, System.Text.Encoding.GetEncoding(arg0, arg1, arg2)); } return(true); }
static void Encoding_EncoderFallback(JSVCall vc) { if (vc.bGet) { System.Text.Encoding _this = (System.Text.Encoding)vc.csObj; var result = _this.EncoderFallback; JSMgr.datax.setObject((int)JSApi.SetType.Rval, result); } else { System.Text.EncoderFallback arg0 = (System.Text.EncoderFallback)JSMgr.datax.getObject((int)JSApi.GetType.Arg); System.Text.Encoding _this = (System.Text.Encoding)vc.csObj; _this.EncoderFallback = arg0; } }
internal MLangCodePageEncoding(SerializationInfo info, StreamingContext context) { if (info == null) { throw new ArgumentNullException("info"); } this.m_codePage = (int) info.GetValue("m_codePage", typeof(int)); try { this.m_isReadOnly = (bool) info.GetValue("m_isReadOnly", typeof(bool)); this.encoderFallback = (EncoderFallback) info.GetValue("encoderFallback", typeof(EncoderFallback)); this.decoderFallback = (DecoderFallback) info.GetValue("decoderFallback", typeof(DecoderFallback)); } catch (SerializationException) { this.m_deserializedFromEverett = true; this.m_isReadOnly = true; } }
// Constructor called by serialization. internal MLangCodePageEncoding(SerializationInfo info, StreamingContext context) { // Any info? if (info==null) throw new ArgumentNullException("info"); // All versions have a code page this.m_codePage = (int)info.GetValue("m_codePage", typeof(int)); // See if we have a code page try { this.m_isReadOnly = (bool)info.GetValue("m_isReadOnly", typeof(bool)); this.encoderFallback = (EncoderFallback)info.GetValue("encoderFallback", typeof(EncoderFallback)); this.decoderFallback = (DecoderFallback)info.GetValue("decoderFallback", typeof(DecoderFallback)); } catch (SerializationException) { this.m_deserializedFromEverett = true; // May as well be read only this.m_isReadOnly = true; } }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(bytes != null, "[SBCSCodePageEncoding.GetBytes]bytes is null"); Debug.Assert(byteCount >= 0, "[SBCSCodePageEncoding.GetBytes]byteCount is negative"); Debug.Assert(chars != null, "[SBCSCodePageEncoding.GetBytes]chars is null"); Debug.Assert(charCount >= 0, "[SBCSCodePageEncoding.GetBytes]charCount is negative"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(EncoderFallback != null, "[SBCSCodePageEncoding.GetBytes]Attempting to use null encoder fallback"); CheckMemorySection(); // Need to test fallback EncoderReplacementFallback fallback = null; // Get any left over characters char charLeftOver = (char)0; if (encoder != null) { charLeftOver = encoder.charLeftOver; Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver), "[SBCSCodePageEncoding.GetBytes]leftover character should be high surrogate"); fallback = encoder.Fallback as EncoderReplacementFallback; // Verify that we have no fallbackbuffer, for SBCS its always empty, so just assert Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer || encoder.FallbackBuffer.Remaining == 0, "[SBCSCodePageEncoding.GetBytes]Expected empty fallback buffer at start"); // if (encoder.m_throwOnOverflow && encoder.InternalHasFallbackBuffer && // encoder.FallbackBuffer.Remaining > 0) // throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", // EncodingName, encoder.Fallback.GetType())); } else { // If we aren't using default fallback then we may have a complicated count. fallback = EncoderFallback as EncoderReplacementFallback; } // prepare our end char* charEnd = chars + charCount; byte* byteStart = bytes; char* charStart = chars; // See if we do the fast default or slightly slower fallback if (fallback != null && fallback.MaxCharCount == 1) { // Make sure our fallback character is valid first byte bReplacement = _mapUnicodeToBytes[fallback.DefaultString[0]]; // Check for replacements in range, otherwise fall back to slow version. if (bReplacement != 0) { // We should have exactly as many output bytes as input bytes, unless there's a leftover // character, in which case we may need one more. // If we had a leftover character we will have to add a ? (This happens if they had a funky // fallback last time, but not this time. We can't spit any out though, // because with fallback encoder each surrogate is treated as a separate code point) if (charLeftOver > 0) { // Have to have room // Throw even if doing no throw version because this is just 1 char, // so buffer will never be big enough if (byteCount == 0) ThrowBytesOverflow(encoder, true); // This'll make sure we still have more room and also make sure our return value is correct. *(bytes++) = bReplacement; byteCount--; // We used one of the ones we were counting. } // This keeps us from overrunning our output buffer if (byteCount < charCount) { // Throw or make buffer smaller? ThrowBytesOverflow(encoder, byteCount < 1); // Just use what we can charEnd = chars + byteCount; } // Simple way while (chars < charEnd) { char ch2 = *chars; chars++; byte bTemp = _mapUnicodeToBytes[ch2]; // Check for fallback if (bTemp == 0 && ch2 != (char)0) *bytes = bReplacement; else *bytes = bTemp; bytes++; } // Clear encoder if (encoder != null) { encoder.charLeftOver = (char)0; encoder.m_charsUsed = (int)(chars - charStart); } return (int)(bytes - byteStart); } } // Slower version, have to do real fallback. // For fallback we may need a fallback buffer, we know we aren't default fallback EncoderFallbackBuffer fallbackBuffer = null; // prepare our end byte* byteEnd = bytes + byteCount; EncoderFallbackBufferHelper fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback Debug.Assert(encoder != null, "[SBCSCodePageEncoding.GetBytes]Expect to have encoder if we have a charLeftOver"); fallbackBuffer = encoder.FallbackBuffer; fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(chars, charEnd, encoder, true); // This will fallback a pair if *chars is a low surrogate fallbackHelper.InternalFallback(charLeftOver, ref chars); if (fallbackBuffer.Remaining > byteEnd - bytes) { // Throw it, if we don't have enough for this we never will ThrowBytesOverflow(encoder, true); } } // Now we may have fallback char[] already from the encoder fallback above // Go ahead and do it, including the fallback. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackHelper.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // get byte for this char byte bTemp = _mapUnicodeToBytes[ch]; // Check for fallback, this'll catch surrogate pairs too. if (bTemp == 0 && ch != (char)0) { // Get Fallback if (fallbackBuffer == null) { // Create & init fallback buffer if (encoder == null) fallbackBuffer = EncoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); // chars has moved so we need to remember figure it out so Exception fallback // index will be correct fallbackHelper.InternalInitialize(charEnd - charCount, charEnd, encoder, true); } // Make sure we have enough room. Each fallback char will be 1 output char // (or recursion exception will be thrown) fallbackHelper.InternalFallback(ch, ref chars); if (fallbackBuffer.Remaining > byteEnd - bytes) { // Didn't use this char, reset it Debug.Assert(chars > charStart, "[SBCSCodePageEncoding.GetBytes]Expected chars to have advanced (fallback)"); chars--; fallbackHelper.InternalReset(); // Throw it & drop this data ThrowBytesOverflow(encoder, chars == charStart); break; } continue; } // We'll use this one // Bounds check if (bytes >= byteEnd) { // didn't use this char, we'll throw or use buffer Debug.Assert(fallbackBuffer == null || fallbackHelper.bFallingBack == false, "[SBCSCodePageEncoding.GetBytes]Expected to NOT be falling back"); if (fallbackBuffer == null || fallbackHelper.bFallingBack == false) { Debug.Assert(chars > charStart, "[SBCSCodePageEncoding.GetBytes]Expected chars to have advanced (normal)"); chars--; // don't use last char } ThrowBytesOverflow(encoder, chars == charStart); // throw ? break; // don't throw, stop } // Go ahead and add it *bytes = bTemp; bytes++; } // encoder stuff if we have one if (encoder != null) { // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases if (fallbackBuffer != null && !fallbackHelper.bUsedEncoder) // Clear it in case of MustFlush encoder.charLeftOver = (char)0; // Set our chars used count encoder.m_charsUsed = (int)(chars - charStart); } // Expect Empty fallback buffer for SBCS Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[SBCSEncoding.GetBytes]Expected Empty fallback buffer at end"); return (int)(bytes - byteStart); }
private MLangCodePageEncoding (SerializationInfo info, StreamingContext context) { if (info == null) throw new ArgumentNullException ("info"); this.codePage = (int) info.GetValue ("m_codePage", typeof (int)); try { this.isReadOnly = (bool) info.GetValue ("m_isReadOnly", typeof (bool)); this.encoderFallback = (EncoderFallback) info.GetValue ("encoderFallback", typeof (EncoderFallback)); this.decoderFallback = (DecoderFallback) info.GetValue ("decoderFallback", typeof (DecoderFallback)); } catch (SerializationException) { // .NET Framework 1.x has no fallbacks this.isReadOnly = true; } }
private unsafe static int InternalGetByteCount (char* chars, int count, EncoderFallback fallback, ref char leftOver, bool flush) { int length = 0; char* end = chars + count; char* start = chars; EncoderFallbackBuffer buffer = null; while (chars < end) { if (leftOver == 0) { for (; chars < end; chars++) { if (*chars < '\x80') { ++length; } else if (*chars < '\x800') { length += 2; } else if (*chars < '\uD800' || *chars > '\uDFFF') { length += 3; } else if (*chars <= '\uDBFF') { // This is a surrogate start char, exit the inner loop only // if we don't find the complete surrogate pair. if (chars + 1 < end && chars [1] >= '\uDC00' && chars [1] <= '\uDFFF') { length += 4; chars++; continue; } leftOver = *chars; chars++; break; } else { // We have a surrogate tail without // leading surrogate. In NET_2_0 it // uses fallback. In NET_1_1 we output // wrong surrogate. char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer); fixed (char *fb_chars = fallback_chars) { char dummy = '\0'; length += InternalGetByteCount (fb_chars, fallback_chars.Length, fallback, ref dummy, true); } leftOver = '\0'; } } } else { if (*chars >= '\uDC00' && *chars <= '\uDFFF') { // We have a correct surrogate pair. length += 4; chars++; } else { // We have a surrogate start followed by a // regular character. Technically, this is // invalid, but we have to do something. // We write out the surrogate start and then // re-visit the current character again. char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer); fixed (char *fb_chars = fallback_chars) { char dummy = '\0'; length += InternalGetByteCount (fb_chars, fallback_chars.Length, fallback, ref dummy, true); } } leftOver = '\0'; } } if (flush) { // Flush the left-over surrogate pair start. if (leftOver != '\0') { length += 3; leftOver = '\0'; } } return length; }
internal static Encoding GetEncodingFromProvider(string encodingName, EncoderFallback enc, DecoderFallback dec) { if (s_providers == null) return null; EncodingProvider[] providers = s_providers; foreach (EncodingProvider provider in providers) { Encoding encoding = provider.GetEncoding(encodingName, enc, dec); if (encoding != null) return encoding; } return null; }
public void SetEncoderFallback(EncoderFallback fallback) { throw new NotImplementedException(); }
unsafe int InternalGetBytes(char *chars, int charLength, int charIndex, int charCount, byte[] bytes, int byteIndex, ref EncoderFallbackBuffer buffer, ref char [] fallback_chars) { if (bytes == null) { throw new ArgumentNullException("bytes"); } if (charIndex < 0 || charIndex > charLength) { throw new ArgumentOutOfRangeException("charIndex", _("ArgRange_StringIndex")); } if (charCount < 0 || charCount > (charLength - charIndex)) { throw new ArgumentOutOfRangeException("charCount", _("ArgRange_StringRange")); } if (byteIndex < 0 || byteIndex > bytes.Length) { throw new ArgumentOutOfRangeException("byteIndex", _("ArgRange_Array")); } if ((bytes.Length - byteIndex) < charCount) { throw new ArgumentException(_("Arg_InsufficientSpace")); } int count = charCount; char ch; while (count-- > 0) { ch = chars [charIndex++]; if (ch < (char)0x80) { bytes [byteIndex++] = (byte)ch; } else { if (buffer == null) { buffer = EncoderFallback.CreateFallbackBuffer(); } if (Char.IsSurrogate(ch) && count > 1 && Char.IsSurrogate(chars [charIndex])) { buffer.Fallback(ch, chars [charIndex], charIndex++ - 1); } else { buffer.Fallback(ch, charIndex - 1); } if (fallback_chars == null || fallback_chars.Length < buffer.Remaining) { fallback_chars = new char [buffer.Remaining]; } for (int i = 0; i < fallback_chars.Length; i++) { fallback_chars [i] = buffer.GetNextChar(); } byteIndex += GetBytes(fallback_chars, 0, fallback_chars.Length, bytes, byteIndex, ref buffer, ref fallback_chars); } } return(charCount); }
private unsafe static int InternalGetByteCount(char *chars, int count, EncoderFallback fallback, ref char leftOver, bool flush) { int length = 0; char *end = chars + count; char *start = chars; EncoderFallbackBuffer buffer = null; while (chars < end) { if (leftOver == 0) { for (; chars < end; chars++) { if (*chars < '\x80') { ++length; } else if (*chars < '\x800') { length += 2; } else if (*chars < '\uD800' || *chars > '\uDFFF') { length += 3; } else if (*chars <= '\uDBFF') { // This is a surrogate start char, exit the inner loop only // if we don't find the complete surrogate pair. if (chars + 1 < end && chars [1] >= '\uDC00' && chars [1] <= '\uDFFF') { length += 4; chars++; continue; } leftOver = *chars; chars++; break; } else { // We have a surrogate tail without // leading surrogate. In NET_2_0 it // uses fallback. In NET_1_1 we output // wrong surrogate. char [] fallback_chars = GetFallbackChars(chars, start, fallback, ref buffer); fixed(char *fb_chars = fallback_chars) { char dummy = '\0'; length += InternalGetByteCount(fb_chars, fallback_chars.Length, fallback, ref dummy, true); } leftOver = '\0'; } } } else { if (*chars >= '\uDC00' && *chars <= '\uDFFF') { // We have a correct surrogate pair. length += 4; chars++; } else { // We have a surrogate start followed by a // regular character. Technically, this is // invalid, but we have to do something. // We write out the surrogate start and then // re-visit the current character again. char [] fallback_chars = GetFallbackChars(chars, start, fallback, ref buffer); fixed(char *fb_chars = fallback_chars) { char dummy = '\0'; length += InternalGetByteCount(fb_chars, fallback_chars.Length, fallback, ref dummy, true); } } leftOver = '\0'; } } if (flush) { // Flush the left-over surrogate pair start. if (leftOver != '\0') { length += 3; leftOver = '\0'; } } return(length); }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(count >= 0, "[SBCSCodePageEncoding.GetByteCount]count is negative"); Debug.Assert(chars != null, "[SBCSCodePageEncoding.GetByteCount]chars is null"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(EncoderFallback != null, "[SBCSCodePageEncoding.GetByteCount]Attempting to use null fallback"); CheckMemorySection(); // Need to test fallback EncoderReplacementFallback fallback = null; // Get any left over characters char charLeftOver = (char)0; if (encoder != null) { charLeftOver = encoder.charLeftOver; Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver), "[SBCSCodePageEncoding.GetByteCount]leftover character should be high surrogate"); fallback = encoder.Fallback as EncoderReplacementFallback; // Verify that we have no fallbackbuffer, actually for SBCS this is always empty, so just assert Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer || encoder.FallbackBuffer.Remaining == 0, "[SBCSCodePageEncoding.GetByteCount]Expected empty fallback buffer at start"); } else { // If we aren't using default fallback then we may have a complicated count. fallback = EncoderFallback as EncoderReplacementFallback; } if ((fallback != null && fallback.MaxCharCount == 1)/* || bIsBestFit*/) { // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always // same as input size. // Note that no existing SBCS code pages map code points to supplementary characters, so this is easy. // We could however have 1 extra byte if the last call had an encoder and a funky fallback and // if we don't use the funky fallback this time. // Do we have an extra char left over from last time? if (charLeftOver > 0) count++; return (count); } // It had a funky fallback, so it's more complicated // May need buffer later EncoderFallbackBuffer fallbackBuffer = null; // prepare our end int byteCount = 0; char* charEnd = chars + count; EncoderFallbackBufferHelper fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { // Since leftover char was a surrogate, it'll have to be fallen back. // Get fallback Debug.Assert(encoder != null, "[SBCSCodePageEncoding.GetByteCount]Expect to have encoder if we have a charLeftOver"); fallbackBuffer = encoder.FallbackBuffer; fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(chars, charEnd, encoder, false); // This will fallback a pair if *chars is a low surrogate fallbackHelper.InternalFallback(charLeftOver, ref chars); } // Now we may have fallback char[] already from the encoder // Go ahead and do it, including the fallback. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackHelper.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // get byte for this char byte bTemp = _mapUnicodeToBytes[ch]; // Check for fallback, this'll catch surrogate pairs too. if (bTemp == 0 && ch != (char)0) { if (fallbackBuffer == null) { // Create & init fallback buffer if (encoder == null) fallbackBuffer = EncoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); // chars has moved so we need to remember figure it out so Exception fallback // index will be correct fallbackHelper.InternalInitialize(charEnd - count, charEnd, encoder, false); } // Get Fallback fallbackHelper.InternalFallback(ch, ref chars); continue; } // We'll use this one byteCount++; } Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[SBCSEncoding.GetByteCount]Expected Empty fallback buffer at end"); return (int)byteCount; }
internal void SetFallbackInternal (EncoderFallback e, DecoderFallback d) { if (e != null) encoder_fallback = e; if (d != null) decoder_fallback = d; }
internal void OnDeserializing() { // intialize the optional Whidbey fields encoderFallback = null; decoderFallback = null; m_isReadOnly = true; }
// Default fallback that we'll use. internal virtual void SetDefaultFallbacks() { #if FEATURE_CORECLR // For coreclr we only have Unicode, so we don't have best fit fallbacks this.encoderFallback = new EncoderReplacementFallback("\xFFFD"); this.decoderFallback = new DecoderReplacementFallback("\xFFFD"); #else // !FEATURE_CORECLR // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string, // For ASCII we use "?" replacement fallback, etc. this.encoderFallback = new InternalEncoderBestFitFallback(this); this.decoderFallback = new InternalDecoderBestFitFallback(this); #endif // FEATURE_CORECLR }
public static Windows1250Encoding Create(EncoderFallback encoder, DecoderFallback decoder) { return new Windows1250Encoding(encoder, decoder); }
// Internal version of "GetBytes" which can handle a rolling // state between multiple calls to this method. private static int InternalGetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, EncoderFallback fallback, ref EncoderFallbackBuffer buffer, ref char leftOver, bool flush) { // Validate the parameters. if (chars == null) { throw new ArgumentNullException("chars"); } if (bytes == null) { throw new ArgumentNullException("bytes"); } if (charIndex < 0 || charIndex > chars.Length) { throw new ArgumentOutOfRangeException("charIndex", _("ArgRange_Array")); } if (charCount < 0 || charCount > (chars.Length - charIndex)) { throw new ArgumentOutOfRangeException("charCount", _("ArgRange_Array")); } if (byteIndex < 0 || byteIndex > bytes.Length) { throw new ArgumentOutOfRangeException("byteIndex", _("ArgRange_Array")); } if (charIndex == chars.Length) { if (flush && leftOver != '\0') { // FIXME: use EncoderFallback. // // By default it is empty, so I do nothing for now. leftOver = '\0'; } return(0); } unsafe { fixed(char *cptr = chars) { if (bytes.Length == byteIndex) { return(InternalGetBytes( cptr + charIndex, charCount, null, 0, fallback, ref buffer, ref leftOver, flush)); fixed(byte *bptr = bytes) { return(InternalGetBytes( cptr + charIndex, charCount, bptr + byteIndex, bytes.Length - byteIndex, fallback, ref buffer, ref leftOver, flush)); } } } }
private unsafe static int InternalGetBytes(char *chars, int count, byte *bytes, int bcount, EncoderFallback fallback, ref EncoderFallbackBuffer buffer, ref char leftOver, bool flush) { char *end = chars + count; char *start = chars; byte *start_bytes = bytes; byte *end_bytes = bytes + bcount; while (chars < end) { if (leftOver == 0) { for (; chars < end; chars++) { int ch = *chars; if (ch < '\x80') { if (bytes >= end_bytes) { goto fail_no_space; } *bytes++ = (byte)ch; } else if (ch < '\x800') { if (bytes + 1 >= end_bytes) { goto fail_no_space; } bytes [0] = (byte)(0xC0 | (ch >> 6)); bytes [1] = (byte)(0x80 | (ch & 0x3F)); bytes += 2; } else if (ch < '\uD800' || ch > '\uDFFF') { if (bytes + 2 >= end_bytes) { goto fail_no_space; } bytes [0] = (byte)(0xE0 | (ch >> 12)); bytes [1] = (byte)(0x80 | ((ch >> 6) & 0x3F)); bytes [2] = (byte)(0x80 | (ch & 0x3F)); bytes += 3; } else if (ch <= '\uDBFF') { // This is a surrogate char, exit the inner loop. leftOver = *chars; chars++; break; } else { // We have a surrogate tail without // leading surrogate. In NET_2_0 it // uses fallback. In NET_1_1 we output // wrong surrogate. char [] fallback_chars = GetFallbackChars(chars, start, fallback, ref buffer); char dummy = '\0'; if (bytes + InternalGetByteCount(fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes) goto fail_no_space; fixed(char *fb_chars = fallback_chars) { bytes += InternalGetBytes(fb_chars, fallback_chars.Length, bytes, bcount - (int)(bytes - start_bytes), fallback, ref buffer, ref dummy, true); } leftOver = '\0'; } } } else { if (*chars >= '\uDC00' && *chars <= '\uDFFF') { // We have a correct surrogate pair. int ch = 0x10000 + (int)*chars - 0xDC00 + (((int)leftOver - 0xD800) << 10); if (bytes + 3 >= end_bytes) { goto fail_no_space; } bytes [0] = (byte)(0xF0 | (ch >> 18)); bytes [1] = (byte)(0x80 | ((ch >> 12) & 0x3F)); bytes [2] = (byte)(0x80 | ((ch >> 6) & 0x3F)); bytes [3] = (byte)(0x80 | (ch & 0x3F)); bytes += 4; chars++; } else { // We have a surrogate start followed by a // regular character. Technically, this is // invalid, but we have to do something. // We write out the surrogate start and then // re-visit the current character again. char [] fallback_chars = GetFallbackChars(chars, start, fallback, ref buffer); char dummy = '\0'; if (bytes + InternalGetByteCount(fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes) goto fail_no_space; fixed(char *fb_chars = fallback_chars) { InternalGetBytes(fb_chars, fallback_chars.Length, bytes, bcount - (int)(bytes - start_bytes), fallback, ref buffer, ref dummy, true); } leftOver = '\0'; } leftOver = '\0'; } } if (flush) { // Flush the left-over surrogate pair start. if (leftOver != '\0') { int ch = leftOver; if (bytes + 2 < end_bytes) { bytes [0] = (byte)(0xE0 | (ch >> 12)); bytes [1] = (byte)(0x80 | ((ch >> 6) & 0x3F)); bytes [2] = (byte)(0x80 | (ch & 0x3F)); bytes += 3; } else { goto fail_no_space; } leftOver = '\0'; } } return((int)(bytes - (end_bytes - bcount))); fail_no_space: throw new ArgumentException("Insufficient Space", "bytes"); }
protected EncodingNLS(int codePage, EncoderFallback enc, DecoderFallback dec) : base(codePage, enc, dec) { }
public Windows1250Encoding(EncoderFallback encoderFallback, DecoderFallback decoderFallback) : base(1250, "Central and Eastern Europe (Windows)", "windows-1250", encoderFallback, decoderFallback) { byteToChars_ = new char[] { '\u0000' // '' , '\u0001' // '' , '\u0002' // '' , '\u0003' // '' , '\u0004' // '' , '\u0005' // '' , '\u0006' // '' , '\u0007' // '' , '\u0008' // '' , '\u0009' // '<TAB>' , '\u000a' // '<LF>' , '\u000b' // '' , '\u000c' // '' , '\u000d' // '<CR>' , '\u000e' // '' , '\u000f' // '' , '\u0010' // '' , '\u0011' // '' , '\u0012' // '' , '\u0013' // '' , '\u0014' // '' , '\u0015' // '' , '\u0016' // '' , '\u0017' // '' , '\u0018' // '' , '\u0019' // '' , '\u001a' // '' , '\u001b' // '' , '\u001c' // '' , '\u001d' // '' , '\u001e' // '' , '\u001f' // '' , '\u0020' // ' ' , '\u0021' // '!' , '\u0022' // '"' , '\u0023' // '#' , '\u0024' // '$' , '\u0025' // '%' , '\u0026' // '&' , '\u0027' // ''' , '\u0028' // '(' , '\u0029' // ')' , '\u002a' // '*' , '\u002b' // '+' , '\u002c' // ',' , '\u002d' // '-' , '\u002e' // '.' , '\u002f' // '/' , '\u0030' // '0' , '\u0031' // '1' , '\u0032' // '2' , '\u0033' // '3' , '\u0034' // '4' , '\u0035' // '5' , '\u0036' // '6' , '\u0037' // '7' , '\u0038' // '8' , '\u0039' // '9' , '\u003a' // ':' , '\u003b' // ';' , '\u003c' // '<' , '\u003d' // '=' , '\u003e' // '>' , '\u003f' // '?' , '\u0040' // '@' , '\u0041' // 'A' , '\u0042' // 'B' , '\u0043' // 'C' , '\u0044' // 'D' , '\u0045' // 'E' , '\u0046' // 'F' , '\u0047' // 'G' , '\u0048' // 'H' , '\u0049' // 'I' , '\u004a' // 'J' , '\u004b' // 'K' , '\u004c' // 'L' , '\u004d' // 'M' , '\u004e' // 'N' , '\u004f' // 'O' , '\u0050' // 'P' , '\u0051' // 'Q' , '\u0052' // 'R' , '\u0053' // 'S' , '\u0054' // 'T' , '\u0055' // 'U' , '\u0056' // 'V' , '\u0057' // 'W' , '\u0058' // 'X' , '\u0059' // 'Y' , '\u005a' // 'Z' , '\u005b' // '[' , '\u005c' // '\' , '\u005d' // ']' , '\u005e' // '^' , '\u005f' // '_' , '\u0060' // '`' , '\u0061' // 'a' , '\u0062' // 'b' , '\u0063' // 'c' , '\u0064' // 'd' , '\u0065' // 'e' , '\u0066' // 'f' , '\u0067' // 'g' , '\u0068' // 'h' , '\u0069' // 'i' , '\u006a' // 'j' , '\u006b' // 'k' , '\u006c' // 'l' , '\u006d' // 'm' , '\u006e' // 'n' , '\u006f' // 'o' , '\u0070' // 'p' , '\u0071' // 'q' , '\u0072' // 'r' , '\u0073' // 's' , '\u0074' // 't' , '\u0075' // 'u' , '\u0076' // 'v' , '\u0077' // 'w' , '\u0078' // 'x' , '\u0079' // 'y' , '\u007a' // 'z' , '\u007b' // '{' , '\u007c' // '|' , '\u007d' // '}' , '\u007e' // '~' , '\u007f' // '' , '\u20ac' // '€' , '\u0081' // '' , '\u201a' // '‚' , '\u0083' // '' , '\u201e' // '„' , '\u2026' // '…' , '\u2020' // '†' , '\u2021' // '‡' , '\u0088' // '' , '\u2030' // '‰' , '\u0160' // 'Š' , '\u2039' // '‹' , '\u015a' // 'Ś' , '\u0164' // 'Ť' , '\u017d' // 'Ž' , '\u0179' // 'Ź' , '\u0090' // '' , '\u2018' // '‘' , '\u2019' // '’' , '\u201c' // '“' , '\u201d' // '”' , '\u2022' // '•' , '\u2013' // '–' , '\u2014' // '—' , '\u0098' // '' , '\u2122' // '™' , '\u0161' // 'š' , '\u203a' // '›' , '\u015b' // 'ś' , '\u0165' // 'ť' , '\u017e' // 'ž' , '\u017a' // 'ź' , '\u00a0' // ' ' , '\u02c7' // 'ˇ' , '\u02d8' // '˘' , '\u0141' // 'Ł' , '\u00a4' // '¤' , '\u0104' // 'Ą' , '\u00a6' // '¦' , '\u00a7' // '§' , '\u00a8' // '¨' , '\u00a9' // '©' , '\u015e' // 'Ş' , '\u00ab' // '«' , '\u00ac' // '¬' , '\u00ad' // '' , '\u00ae' // '®' , '\u017b' // 'Ż' , '\u00b0' // '°' , '\u00b1' // '±' , '\u02db' // '˛' , '\u0142' // 'ł' , '\u00b4' // '´' , '\u00b5' // 'µ' , '\u00b6' // '¶' , '\u00b7' // '·' , '\u00b8' // '¸' , '\u0105' // 'ą' , '\u015f' // 'ş' , '\u00bb' // '»' , '\u013d' // 'Ľ' , '\u02dd' // '˝' , '\u013e' // 'ľ' , '\u017c' // 'ż' , '\u0154' // 'Ŕ' , '\u00c1' // 'Á' , '\u00c2' // 'Â' , '\u0102' // 'Ă' , '\u00c4' // 'Ä' , '\u0139' // 'Ĺ' , '\u0106' // 'Ć' , '\u00c7' // 'Ç' , '\u010c' // 'Č' , '\u00c9' // 'É' , '\u0118' // 'Ę' , '\u00cb' // 'Ë' , '\u011a' // 'Ě' , '\u00cd' // 'Í' , '\u00ce' // 'Î' , '\u010e' // 'Ď' , '\u0110' // 'Đ' , '\u0143' // 'Ń' , '\u0147' // 'Ň' , '\u00d3' // 'Ó' , '\u00d4' // 'Ô' , '\u0150' // 'Ő' , '\u00d6' // 'Ö' , '\u00d7' // '×' , '\u0158' // 'Ř' , '\u016e' // 'Ů' , '\u00da' // 'Ú' , '\u0170' // 'Ű' , '\u00dc' // 'Ü' , '\u00dd' // 'Ý' , '\u0162' // 'Ţ' , '\u00df' // 'ß' , '\u0155' // 'ŕ' , '\u00e1' // 'á' , '\u00e2' // 'â' , '\u0103' // 'ă' , '\u00e4' // 'ä' , '\u013a' // 'ĺ' , '\u0107' // 'ć' , '\u00e7' // 'ç' , '\u010d' // 'č' , '\u00e9' // 'é' , '\u0119' // 'ę' , '\u00eb' // 'ë' , '\u011b' // 'ě' , '\u00ed' // 'í' , '\u00ee' // 'î' , '\u010f' // 'ď' , '\u0111' // 'đ' , '\u0144' // 'ń' , '\u0148' // 'ň' , '\u00f3' // 'ó' , '\u00f4' // 'ô' , '\u0151' // 'ő' , '\u00f6' // 'ö' , '\u00f7' // '÷' , '\u0159' // 'ř' , '\u016f' // 'ů' , '\u00fa' // 'ú' , '\u0171' // 'ű' , '\u00fc' // 'ü' , '\u00fd' // 'ý' , '\u0163' // 'ţ' , '\u02d9' // '˙' }; charToBytes_ = new Dictionary<char, byte>() { { /* '€' */ '\u20ac', 128 }, { /* '‚' */ '\u201a', 130 }, { /* '„' */ '\u201e', 132 }, { /* '…' */ '\u2026', 133 }, { /* '†' */ '\u2020', 134 }, { /* '‡' */ '\u2021', 135 }, { /* '‰' */ '\u2030', 137 }, { /* 'Š' */ '\u0160', 138 }, { /* '‹' */ '\u2039', 139 }, { /* 'Ś' */ '\u015a', 140 }, { /* 'Ť' */ '\u0164', 141 }, { /* 'Ž' */ '\u017d', 142 }, { /* 'Ź' */ '\u0179', 143 }, { /* '‘' */ '\u2018', 145 }, { /* '’' */ '\u2019', 146 }, { /* '“' */ '\u201c', 147 }, { /* '”' */ '\u201d', 148 }, { /* '•' */ '\u2022', 149 }, { /* '–' */ '\u2013', 150 }, { /* '—' */ '\u2014', 151 }, { /* '™' */ '\u2122', 153 }, { /* 'š' */ '\u0161', 154 }, { /* '›' */ '\u203a', 155 }, { /* 'ś' */ '\u015b', 156 }, { /* 'ť' */ '\u0165', 157 }, { /* 'ž' */ '\u017e', 158 }, { /* 'ź' */ '\u017a', 159 }, { /* 'ˇ' */ '\u02c7', 161 }, { /* '˘' */ '\u02d8', 162 }, { /* 'Ł' */ '\u0141', 163 }, { /* 'Ą' */ '\u0104', 165 }, { /* 'Ş' */ '\u015e', 170 }, { /* 'Ż' */ '\u017b', 175 }, { /* '˛' */ '\u02db', 178 }, { /* 'ł' */ '\u0142', 179 }, { /* 'ą' */ '\u0105', 185 }, { /* 'ş' */ '\u015f', 186 }, { /* 'Ľ' */ '\u013d', 188 }, { /* '˝' */ '\u02dd', 189 }, { /* 'ľ' */ '\u013e', 190 }, { /* 'ż' */ '\u017c', 191 }, { /* 'Ŕ' */ '\u0154', 192 }, { /* 'Ă' */ '\u0102', 195 }, { /* 'Ĺ' */ '\u0139', 197 }, { /* 'Ć' */ '\u0106', 198 }, { /* 'Č' */ '\u010c', 200 }, { /* 'Ę' */ '\u0118', 202 }, { /* 'Ě' */ '\u011a', 204 }, { /* 'Ď' */ '\u010e', 207 }, { /* 'Đ' */ '\u0110', 208 }, { /* 'Ń' */ '\u0143', 209 }, { /* 'Ň' */ '\u0147', 210 }, { /* 'Ő' */ '\u0150', 213 }, { /* 'Ř' */ '\u0158', 216 }, { /* 'Ů' */ '\u016e', 217 }, { /* 'Ű' */ '\u0170', 219 }, { /* 'Ţ' */ '\u0162', 222 }, { /* 'ŕ' */ '\u0155', 224 }, { /* 'ă' */ '\u0103', 227 }, { /* 'ĺ' */ '\u013a', 229 }, { /* 'ć' */ '\u0107', 230 }, { /* 'č' */ '\u010d', 232 }, { /* 'ę' */ '\u0119', 234 }, { /* 'ě' */ '\u011b', 236 }, { /* 'ď' */ '\u010f', 239 }, { /* 'đ' */ '\u0111', 240 }, { /* 'ń' */ '\u0144', 241 }, { /* 'ň' */ '\u0148', 242 }, { /* 'ő' */ '\u0151', 245 }, { /* 'ř' */ '\u0159', 248 }, { /* 'ů' */ '\u016f', 249 }, { /* 'ű' */ '\u0171', 251 }, { /* 'ţ' */ '\u0163', 254 }, { /* '˙' */ '\u02d9', 255 }, }; }
public static Encoding GetEncoding(int codepage, EncoderFallback encoderFallback, DecoderFallback decoderFallback) { // Get the default encoding (which is cached and read only) Encoding baseEncoding = GetEncoding(codepage); // Clone it and set the fallback Encoding fallbackEncoding = (Encoding)baseEncoding.Clone(); fallbackEncoding.EncoderFallback = encoderFallback; fallbackEncoding.DecoderFallback = decoderFallback; return fallbackEncoding; }
unsafe static char [] GetFallbackChars (char *chars, char *start, EncoderFallback fallback, ref EncoderFallbackBuffer buffer) { if (buffer == null) buffer = fallback.CreateFallbackBuffer (); buffer.Fallback (*chars, (int) (chars - start)); char [] fallback_chars = new char [buffer.Remaining]; for (int i = 0; i < fallback_chars.Length; i++) fallback_chars [i] = buffer.GetNextChar (); buffer.Reset (); return fallback_chars; }
public static Encoding GetEncoding (string name, EncoderFallback encoderFallback, DecoderFallback decoderFallback) { if (encoderFallback == null) throw new ArgumentNullException ("encoderFallback"); if (decoderFallback == null) throw new ArgumentNullException ("decoderFallback"); Encoding e = GetEncoding (name).Clone () as Encoding; e.is_readonly = false; e.encoder_fallback = encoderFallback; e.decoder_fallback = decoderFallback; return e; }
// This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects // because the encoding object is always created as read-only object and don’t allow setting encoder/decoder fallback // after the creation is done. protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback) { // Validate code page if (codePage < 0) { throw new ArgumentOutOfRangeException("codePage"); } Contract.EndContractBlock(); // Remember code page m_codePage = codePage; this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this); this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this); }
// the following two methods are used for the inherited classes which implemented ISerializable // Deserialization Helper internal void DeserializeEncoding(SerializationInfo info, StreamingContext context) { // Any info? if (info==null) throw new ArgumentNullException("info"); Contract.EndContractBlock(); // All versions have a code page this.m_codePage = (int)info.GetValue("m_codePage", typeof(int)); // We can get dataItem on the fly if needed, and the index is different between versions // so ignore whatever dataItem data we get from Everett. this.dataItem = null; // See if we have a code page try { // // Try Whidbey V2.0 Fields // this.m_isReadOnly = (bool)info.GetValue("m_isReadOnly", typeof(bool)); this.encoderFallback = (EncoderFallback)info.GetValue("encoderFallback", typeof(EncoderFallback)); this.decoderFallback = (DecoderFallback)info.GetValue("decoderFallback", typeof(DecoderFallback)); } catch (SerializationException) { // // Didn't have Whidbey things, must be Everett // this.m_deserializedFromEverett = true; // May as well be read only this.m_isReadOnly = true; SetDefaultFallbacks(); } }
public static Encoding GetEncoding(int codepage, EncoderFallback encoderFallback, DecoderFallback decoderFallback) { Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback); if (baseEncoding != null) return baseEncoding; // Get the default encoding (which is cached and read only) baseEncoding = GetEncoding(codepage); // Clone it and set the fallback Encoding fallbackEncoding = (Encoding)baseEncoding.Clone(); fallbackEncoding.EncoderFallback = encoderFallback; fallbackEncoding.DecoderFallback = decoderFallback; return fallbackEncoding; }
// Internal version of "GetBytes" which can handle a rolling // state between multiple calls to this method. private static int InternalGetBytes (char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, EncoderFallback fallback, ref EncoderFallbackBuffer buffer, ref char leftOver, bool flush) { // Validate the parameters. if (chars == null) { throw new ArgumentNullException ("chars"); } if (bytes == null) { throw new ArgumentNullException ("bytes"); } if (charIndex < 0 || charIndex > chars.Length) { throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array")); } if (charCount < 0 || charCount > (chars.Length - charIndex)) { throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array")); } if (byteIndex < 0 || byteIndex > bytes.Length) { throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array")); } if (charIndex == chars.Length) { if (flush && leftOver != '\0') { // FIXME: use EncoderFallback. // // By default it is empty, so I do nothing for now. leftOver = '\0'; } return 0; } unsafe { fixed (char* cptr = chars) { if (bytes.Length == byteIndex) return InternalGetBytes ( cptr + charIndex, charCount, null, 0, fallback, ref buffer, ref leftOver, flush); fixed (byte *bptr = bytes) { return InternalGetBytes ( cptr + charIndex, charCount, bptr + byteIndex, bytes.Length - byteIndex, fallback, ref buffer, ref leftOver, flush); } } } }
internal EncoderNLS(EncodingNLS encoding) { m_encoding = encoding; m_fallback = m_encoding.EncoderFallback; Reset(); }
// Default fallback that we'll use. internal virtual void SetDefaultFallbacks() { // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string, // For ASCII we use "?" replacement fallback, etc. this.encoderFallback = new InternalEncoderBestFitFallback(this); this.decoderFallback = new InternalDecoderBestFitFallback(this); }
private unsafe static int InternalGetBytes (char* chars, int count, byte* bytes, int bcount, EncoderFallback fallback, ref EncoderFallbackBuffer buffer, ref char leftOver, bool flush) { char* end = chars + count; char* start = chars; byte* start_bytes = bytes; byte* end_bytes = bytes + bcount; while (chars < end) { if (leftOver == 0) { for (; chars < end; chars++) { int ch = *chars; if (ch < '\x80') { if (bytes >= end_bytes) goto fail_no_space; *bytes++ = (byte)ch; } else if (ch < '\x800') { if (bytes + 1 >= end_bytes) goto fail_no_space; bytes [0] = (byte) (0xC0 | (ch >> 6)); bytes [1] = (byte) (0x80 | (ch & 0x3F)); bytes += 2; } else if (ch < '\uD800' || ch > '\uDFFF') { if (bytes + 2 >= end_bytes) goto fail_no_space; bytes [0] = (byte) (0xE0 | (ch >> 12)); bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F)); bytes [2] = (byte) (0x80 | (ch & 0x3F)); bytes += 3; } else if (ch <= '\uDBFF') { // This is a surrogate char, exit the inner loop. leftOver = *chars; chars++; break; } else { // We have a surrogate tail without // leading surrogate. In NET_2_0 it // uses fallback. In NET_1_1 we output // wrong surrogate. char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer); char dummy = '\0'; if (bytes + InternalGetByteCount (fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes) goto fail_no_space; fixed (char *fb_chars = fallback_chars) { bytes += InternalGetBytes (fb_chars, fallback_chars.Length, bytes, bcount - (int) (bytes - start_bytes), fallback, ref buffer, ref dummy, true); } leftOver = '\0'; } } } else { if (*chars >= '\uDC00' && *chars <= '\uDFFF') { // We have a correct surrogate pair. int ch = 0x10000 + (int) *chars - 0xDC00 + (((int) leftOver - 0xD800) << 10); if (bytes + 3 >= end_bytes) goto fail_no_space; bytes [0] = (byte) (0xF0 | (ch >> 18)); bytes [1] = (byte) (0x80 | ((ch >> 12) & 0x3F)); bytes [2] = (byte) (0x80 | ((ch >> 6) & 0x3F)); bytes [3] = (byte) (0x80 | (ch & 0x3F)); bytes += 4; chars++; } else { // We have a surrogate start followed by a // regular character. Technically, this is // invalid, but we have to do something. // We write out the surrogate start and then // re-visit the current character again. char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer); char dummy = '\0'; if (bytes + InternalGetByteCount (fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes) goto fail_no_space; fixed (char *fb_chars = fallback_chars) { InternalGetBytes (fb_chars, fallback_chars.Length, bytes, bcount - (int) (bytes - start_bytes), fallback, ref buffer, ref dummy, true); } leftOver = '\0'; } leftOver = '\0'; } } if (flush) { // Flush the left-over surrogate pair start. if (leftOver != '\0') { int ch = leftOver; if (bytes + 2 < end_bytes) { bytes [0] = (byte) (0xE0 | (ch >> 12)); bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F)); bytes [2] = (byte) (0x80 | (ch & 0x3F)); bytes += 3; } else { goto fail_no_space; } leftOver = '\0'; } } return (int)(bytes - (end_bytes - bcount)); fail_no_space: throw new ArgumentException ("Insufficient Space", "bytes"); }
public static Encoding GetEncoding(String name, EncoderFallback encoderFallback, DecoderFallback decoderFallback) { Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback); if (baseEncoding != null) return baseEncoding; // // NOTE: If you add a new encoding that can be requested by name, be sure to // add the corresponding item in EncodingTable. // Otherwise, the code below will throw exception when trying to call // EncodingTable.GetCodePageFromName(). // return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback)); }
// Constructor. public UTF8Encoder (EncoderFallback fallback, bool emitIdentifier) { Fallback = fallback; // this.emitIdentifier = emitIdentifier; leftOverForCount = '\0'; leftOverForConv = '\0'; }
[System.Security.SecurityCritical] // auto-generated internal DBCSCodePageEncoding(int codePage, int dataCodePage, EncoderFallback enc, DecoderFallback dec) : base(codePage, dataCodePage, enc, dec) { }
[MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetByteCountCommon private protected sealed override unsafe int GetByteCountFast(char *pChars, int charsLength, EncoderFallback fallback, out int charsConsumed) { // First: Can we short-circuit the entire calculation? // If an EncoderReplacementFallback is in use, all non-ASCII chars // (including surrogate halves) are replaced with the default string. // If the default string consists of a single ASCII value, then we // know there's a 1:1 char->byte transcoding in all cases. int byteCount = charsLength; if (!(fallback is EncoderReplacementFallback replacementFallback && replacementFallback.MaxCharCount == 1 && replacementFallback.DefaultString[0] <= 0x7F)) { // Unrecognized fallback mechanism - count chars manually. byteCount = (int)ASCIIUtility.GetIndexOfFirstNonAsciiChar(pChars, (uint)charsLength); } charsConsumed = byteCount; return(byteCount); }