private static bool ValidateUtf8(ref Utf8ValidatorState state, ReadableBuffer payload, bool fin) { // Walk through the payload verifying it var offset = 0; foreach (var mem in payload) { var span = mem.Span; for (int i = 0; i < span.Length; i++) { var b = span[i]; if (!state._withinSequence) { // This is the first byte of a char, so set things up var width = _utf8Width[b]; state._remainingBytesInChar = width - 1; if (state._remainingBytesInChar < 0) { // Invalid first byte return(false); } // Use the width (-1) to index into the mask and min tables. state._currentDecodedValue = b & _utf8Mask[width - 1]; state._minCodePoint = _utf8Min[width - 1]; state._withinSequence = true; } else { // Add this byte to the value state._currentDecodedValue = (state._currentDecodedValue << 6) | (b & 0x3F); state._remainingBytesInChar--; } // Fast invalid exits if (state._remainingBytesInChar == 1 && state._currentDecodedValue >= 0x360 && state._currentDecodedValue <= 0x37F) { // This will be a UTF-16 surrogate: 0xD800-0xDFFF return(false); } if (state._remainingBytesInChar == 2 && state._currentDecodedValue >= 0x110) { // This will be above the maximum Unicode character (0x10FFFF). return(false); } if (state._remainingBytesInChar == 0) { // Check the range of the final decoded value if (state._currentDecodedValue < state._minCodePoint) { // This encoding is longer than it should be, which is not allowed. return(false); } // Reset state state._withinSequence = false; } offset++; } } // We're done. // The value is valid if: // 1. We haven't reached the end of the whole message yet (we'll be caching this state for the next message) // 2. We aren't inside a character sequence (i.e. the last character isn't unterminated) return(!fin || !state._withinSequence); }
public static bool ValidateUtf8(ReadableBuffer payload) { var state = new Utf8ValidatorState(); return(ValidateUtf8(ref state, payload, fin: true)); }