Exemplo n.º 1
0
        static utf_error get_sequence_3(ref string src, int it, int end, ref int code_point)
        {
            if (it == end)
            {
                return(utf_error.NOT_ENOUGH_ROOM);
            }

            code_point = mask8(src[it]);

            {
                utf_error ret1 = increase_safely(ref src, it, end);
                if (ret1 != utf_error.UTF8_OK)
                {
                    return(ret1);
                }
            }


            code_point = ((code_point << 12) & 0xffff) + ((mask8(src[it]) << 6) & 0xfff);

            utf_error ret2 = increase_safely(ref src, it, end);

            if (ret2 != utf_error.UTF8_OK)
            {
                return(ret2);
            }

            code_point += (src[it]) & 0x3f;

            return(utf_error.UTF8_OK);
        }
Exemplo n.º 2
0
        public static bool IsStringValidUTF8(string source)
        {
            int result = 0;
            int end    = source.Length;

            while (result != end)
            {
                int       t        = 0;
                utf_error err_code = validate_next(ref source, ref result, end, ref t);
                if (err_code != utf_error.UTF8_OK)
                {
                    return(result >= end);
                }
            }
            return(result >= end);
        }
Exemplo n.º 3
0
        static utf_error get_sequence_2(ref string src, int it, int end, ref int code_point)
        {
            if (it == end)
            {
                return(utf_error.NOT_ENOUGH_ROOM);
            }

            code_point = mask8(src[it]);


            utf_error ret = increase_safely(ref src, it, end);

            if (ret != utf_error.UTF8_OK)
            {
                return(ret);
            }


            code_point = ((code_point << 6) & 0x7ff) + ((src[it]) & 0x3f);

            return(utf_error.UTF8_OK);
        }
Exemplo n.º 4
0
        static utf_error validate_next(ref string src, ref int it, int end, ref int code_point)
        {
            if (it == end)
            {
                return(utf_error.NOT_ENOUGH_ROOM);
            }

            // Save the original value of it so we can go back ref case of failure
            // Of course, it does not make much sense with i.e. stream iterators
            int original_it = it;

            Int32 cp = 0;
            // Determine the sequence length based on the lead octet

            int length = sequence_length(ref src, it);

            // Get trail octets and calculate the code point
            utf_error err = utf_error.UTF8_OK;

            switch (length)
            {
            case 0:
                return(utf_error.INVALID_LEAD);

            case 1:
                err = get_sequence_1(ref src, it, end, ref cp);
                break;

            case 2:
                err = get_sequence_2(ref src, it, end, ref cp);
                break;

            case 3:
                err = get_sequence_3(ref src, it, end, ref cp);
                break;

            case 4:
                err = get_sequence_4(ref src, it, end, ref cp);
                break;
            }

            if (err == utf_error.UTF8_OK)
            {
                // Decoding succeeded. Now, security checks...
                if (is_code_point_valid((UInt32)cp))
                {
                    if (!is_overlong_sequence((UInt32)cp, length))
                    {
                        // Passed! Return here.
                        code_point = cp;
                        ++it;
                        return(utf_error.UTF8_OK);
                    }
                    else
                    {
                        err = utf_error.OVERLONG_SEQUENCE;
                    }
                }
                else
                {
                    err = utf_error.INVALID_CODE_POINT;
                }
            }

            // Failure branch - restore the original value of the iterator
            it = original_it;
            return(err);
        }