Esempio n. 1
0
        public static bool handle_unicode_codepoint(uint8_t **src_ptr, uint8_t **dst_ptr)
        {
            uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2);

            *src_ptr += 6;
            // check for low surrogate for characters outside the Basic
            // Multilingual Plane.
            if (code_point >= 0xd800 && code_point < 0xdc00)
            {
                if (((*src_ptr)[0] != (bytechar)'\\') || (*src_ptr)[1] != (bytechar)'u')
                {
                    return(false);
                }

                uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2);
                code_point =
                    (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
                *src_ptr += 6;
            }

            size_t offset = codepoint_to_utf8(code_point, *dst_ptr);

            *dst_ptr += offset;
            return(offset > 0);
        }
Esempio n. 2
0
        public static bool handle_unicode_codepoint(uint8_t **src_ptr, uint8_t **dst_ptr)
        {
            // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
            // conversion isn't valid; we defer the check for this to inside the
            // multilingual plane check
            uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2);

            *src_ptr += 6;
            // check for low surrogate for characters outside the Basic
            // Multilingual Plane.
            if (code_point >= 0xd800 && code_point < 0xdc00)
            {
                if (((*src_ptr)[0] != (bytechar)'\\') || (*src_ptr)[1] != (bytechar)'u')
                {
                    return(false);
                }

                uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2);

                // if the first code point is invalid we will get here, as we will go past
                // the check for being outside the Basic Multilingual plane. If we don't
                // find a \u immediately afterwards we fail out anyhow, but if we do,
                // this check catches both the case of the first code point being invalid
                // or the second code point being invalid.
                if ((code_point | code_point_2) >> 16 != 0)
                {
                    return(false);
                }

                code_point =
                    (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
                *src_ptr += 6;
            }

            size_t offset = codepoint_to_utf8(code_point, *dst_ptr);

            *dst_ptr += offset;
            return(offset > 0);
        }