public static bool handle_unicode_codepoint(uint8_t **src_ptr, uint8_t **dst_ptr) { uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; // check for low surrogate for characters outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { if (((*src_ptr)[0] != (bytechar)'\\') || (*src_ptr)[1] != (bytechar)'u') { return(false); } uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2); code_point = (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; *src_ptr += 6; } size_t offset = codepoint_to_utf8(code_point, *dst_ptr); *dst_ptr += offset; return(offset > 0); }
public static bool handle_unicode_codepoint(uint8_t **src_ptr, uint8_t **dst_ptr) { // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the // multilingual plane check uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; // check for low surrogate for characters outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { if (((*src_ptr)[0] != (bytechar)'\\') || (*src_ptr)[1] != (bytechar)'u') { return(false); } uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2); // if the first code point is invalid we will get here, as we will go past // the check for being outside the Basic Multilingual plane. If we don't // find a \u immediately afterwards we fail out anyhow, but if we do, // this check catches both the case of the first code point being invalid // or the second code point being invalid. if ((code_point | code_point_2) >> 16 != 0) { return(false); } code_point = (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; *src_ptr += 6; } size_t offset = codepoint_to_utf8(code_point, *dst_ptr); *dst_ptr += offset; return(offset > 0); }