Convert() public static method

A converter that processes a byte buffer containing a mix of UTF8 and Latin-1/Cp1252 chars. The result is a buffer where those chars have been converted to UTF-8; that means it contains only valid UTF-8 chars.

Explanation of the processing: First the encoding of the buffer is detected looking at the first four bytes (that works only if the buffer starts with an ASCII-char, like xmls '<'). UTF-16/32 flavours do not require further proccessing.

In the case, UTF-8 is detected, it assumes wrong UTF8 chars to be a sequence of Latin-1/Cp1252 encoded bytes and converts the chars to their corresponding UTF-8 byte sequence.

The 0x80..0x9F range is undefined in Latin-1, but is defined in Windows code page 1252. The bytes 0x81, 0x8D, 0x8F, 0x90, and 0x9D are formally undefined by Windows 1252. These are in XML's RestrictedChar set, so we map them to a space.

The official Latin-1 characters in the range 0xA0..0xFF are converted into the Unicode Latin Supplement range U+00A0 - U+00FF.

Example: If an Euro-symbol (€) appears in the byte buffer (0xE2, 0x82, 0xAC), it will be left as is. But if only the first two bytes are appearing, followed by an ASCII char a (0xE2 - 0x82 - 0x41), it will be converted to 0xC3, 0xA2 (â) - 0xE2, 0x80, 0x9A (‚) - 0x41 (a).

public static Convert ( ByteBuffer buffer ) : ByteBuffer
buffer ByteBuffer a byte buffer contain
return ByteBuffer
Beispiel #1
0
        /// <summary>
        /// Parses XML from a byte buffer,
        /// fixing the encoding (Latin-1 to UTF-8) and illegal control character optionally.
        /// </summary>
        /// <param name="buffer"> a byte buffer containing the XMP packet </param>
        /// <param name="options"> the parsing options </param>
        /// <returns> Returns an XML DOM-Document. </returns>
        /// <exception cref="XmpException"> Thrown when the parsing fails. </exception>
        private static XmlDocument ParseXmlFromBytebuffer(ByteBuffer buffer, ParseOptions options)
        {
            try {
                XmlDocument doc = new XmlDocument();
                doc.Load(buffer.ByteStream);
                return(doc);
            }
            catch (XmpException e) {
                XmlDocument doc = new XmlDocument();
                if (e.ErrorCode == XmpError.BADXML || e.ErrorCode == XmpError.BADSTREAM)
                {
                    if (options.AcceptLatin1)
                    {
                        buffer = Latin1Converter.Convert(buffer);
                    }

                    if (options.FixControlChars)
                    {
                        try {
                            string       encoding  = buffer.Encoding;
                            StreamReader fixReader = new FixAsciiControlsReader(buffer.ByteStream, encoding);
                            doc.Load(fixReader);
                            return(doc);
                        }
                        catch (Exception) {
                            // can normally not happen as the encoding is provided by a util function
                            throw new XmpException("Unsupported Encoding", XmpError.INTERNALFAILURE, e);
                        }
                    }
                    doc.Load(buffer.ByteStream);
                    return(doc);
                }
                throw e;
            }
        }
Beispiel #2
0
        /// <summary>
        /// Parses XML from a byte buffer,
        /// fixing the encoding (Latin-1 to UTF-8) and illegal control character optionally.
        /// </summary>
        /// <param name="buffer"> a byte buffer containing the XMP packet </param>
        /// <param name="options"> the parsing options </param>
        /// <returns> Returns an XML DOM-Document. </returns>
        /// <exception cref="XmpException"> Thrown when the parsing fails. </exception>
        private static XmlDocument ParseXmlFromBytebuffer(ByteBuffer buffer, ParseOptions options)
        {
            try {
                XmlDocument doc = new XmlDocument();
                doc.Load(GetSecureXmlReader(buffer.ByteStream));
                return(doc);
            } catch (XmlException e) {
                XmlDocument doc = new XmlDocument();
                if (options.AcceptLatin1)
                {
                    buffer = Latin1Converter.Convert(buffer);
                }

                if (options.FixControlChars)
                {
                    try {
                        StreamReader           streamReader = new StreamReader(buffer.ByteStream, Encoding.GetEncoding(buffer.Encoding));
                        FixAsciiControlsReader fixReader    = new FixAsciiControlsReader(streamReader);
                        doc.Load(GetSecureXmlReader(fixReader));
                        return(doc);
                    } catch (Exception) {
                        // can normally not happen as the encoding is provided by a util function
                        throw new XmpException("Unsupported Encoding", XmpError.INTERNALFAILURE, e);
                    }
                }
                doc.Load(buffer.ByteStream);
                return(doc);
            }
        }