Exemple #1
0
        private static bool ValidateUtf8(ref Utf8ValidatorState state, ReadableBuffer payload, bool fin)
        {
            // Walk through the payload verifying it
            var offset = 0;

            foreach (var mem in payload)
            {
                var span = mem.Span;
                for (int i = 0; i < span.Length; i++)
                {
                    var b = span[i];
                    if (!state._withinSequence)
                    {
                        // This is the first byte of a char, so set things up
                        var width = _utf8Width[b];
                        state._remainingBytesInChar = width - 1;
                        if (state._remainingBytesInChar < 0)
                        {
                            // Invalid first byte
                            return(false);
                        }

                        // Use the width (-1) to index into the mask and min tables.
                        state._currentDecodedValue = b & _utf8Mask[width - 1];
                        state._minCodePoint        = _utf8Min[width - 1];
                        state._withinSequence      = true;
                    }
                    else
                    {
                        // Add this byte to the value
                        state._currentDecodedValue = (state._currentDecodedValue << 6) | (b & 0x3F);
                        state._remainingBytesInChar--;
                    }

                    // Fast invalid exits
                    if (state._remainingBytesInChar == 1 && state._currentDecodedValue >= 0x360 && state._currentDecodedValue <= 0x37F)
                    {
                        // This will be a UTF-16 surrogate: 0xD800-0xDFFF
                        return(false);
                    }
                    if (state._remainingBytesInChar == 2 && state._currentDecodedValue >= 0x110)
                    {
                        // This will be above the maximum Unicode character (0x10FFFF).
                        return(false);
                    }

                    if (state._remainingBytesInChar == 0)
                    {
                        // Check the range of the final decoded value
                        if (state._currentDecodedValue < state._minCodePoint)
                        {
                            // This encoding is longer than it should be, which is not allowed.
                            return(false);
                        }

                        // Reset state
                        state._withinSequence = false;
                    }
                    offset++;
                }
            }

            // We're done.
            // The value is valid if:
            //  1. We haven't reached the end of the whole message yet (we'll be caching this state for the next message)
            //  2. We aren't inside a character sequence (i.e. the last character isn't unterminated)
            return(!fin || !state._withinSequence);
        }
Exemple #2
0
        public static bool ValidateUtf8(ReadableBuffer payload)
        {
            var state = new Utf8ValidatorState();

            return(ValidateUtf8(ref state, payload, fin: true));
        }