Example #1
0
        public virtual unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> utf8Text)
        {
            if (!_isAsciiCacheInitialized)
            {
                InitializeAsciiCache();
            }

            // Loop through the input text, terminating when we see ill-formed UTF-8 or when we decode a scalar value
            // that must be encoded. If we see either of these things then we'll return its index in the original
            // input sequence. If we consume the entire text without seeing either of these, return -1 to indicate
            // that the text can be copied as-is without escaping.

            fixed(byte *ptr = utf8Text)
            {
                int idx = 0;

#if NETCOREAPP
                if ((Sse2.IsSupported || AdvSimd.Arm64.IsSupported) && utf8Text.Length - 16 >= idx)
                {
                    // Hoist these outside the loop, as the JIT won't do it.
                    Vector128 <sbyte> bitMaskLookupAsciiNeedsEscaping = _bitMaskLookupAsciiNeedsEscaping;
                    Vector128 <sbyte> bitPosLookup    = Ssse3Helper.s_bitPosLookup;
                    Vector128 <sbyte> nibbleMaskSByte = Ssse3Helper.s_nibbleMaskSByte;
                    Vector128 <sbyte> nullMaskSByte   = Ssse3Helper.s_nullMaskSByte;

                    sbyte *startingAddress = (sbyte *)ptr;
                    do
                    {
                        Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16));

                        // Load the next 16 bytes.
                        Vector128 <sbyte> sourceValue;
                        bool containsNonAsciiBytes;

                        // Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
                        // casted to signed byte.
                        if (Sse2.IsSupported)
                        {
                            sourceValue           = Sse2.LoadVector128(startingAddress);
                            containsNonAsciiBytes = Sse2Helper.ContainsNonAsciiByte(sourceValue);
                        }
                        else if (AdvSimd.Arm64.IsSupported)
                        {
                            sourceValue           = AdvSimd.LoadVector128(startingAddress);
                            containsNonAsciiBytes = AdvSimdHelper.ContainsNonAsciiByte(sourceValue);
                        }
                        else
                        {
                            throw new PlatformNotSupportedException();
                        }

                        if (!containsNonAsciiBytes)
                        {
                            // All of the following 16 bytes is ASCII.
                            // TODO AdvSimd: optimization maybe achievable using VectorTableLookup and/or VectorTableLookupExtension

                            if (Ssse3.IsSupported)
                            {
                                Vector128 <sbyte> mask = Ssse3Helper.CreateEscapingMask(sourceValue, bitMaskLookupAsciiNeedsEscaping, bitPosLookup, nibbleMaskSByte, nullMaskSByte);
                                int index = Sse2Helper.GetIndexOfFirstNonAsciiByte(mask.AsByte());

                                if (index < 16)
                                {
                                    idx += index;
                                    goto Return;
                                }
                            }
                            else
                            {
                                byte *p = (byte *)startingAddress;
                                if (DoesAsciiNeedEncoding(p[0]))
                                {
                                    goto Return;
                                }
                                if (DoesAsciiNeedEncoding(p[1]))
                                {
                                    goto Return1;
                                }
                                if (DoesAsciiNeedEncoding(p[2]))
                                {
                                    goto Return2;
                                }
                                if (DoesAsciiNeedEncoding(p[3]))
                                {
                                    goto Return3;
                                }
                                if (DoesAsciiNeedEncoding(p[4]))
                                {
                                    goto Return4;
                                }
                                if (DoesAsciiNeedEncoding(p[5]))
                                {
                                    goto Return5;
                                }
                                if (DoesAsciiNeedEncoding(p[6]))
                                {
                                    goto Return6;
                                }
                                if (DoesAsciiNeedEncoding(p[7]))
                                {
                                    goto Return7;
                                }
                                if (DoesAsciiNeedEncoding(p[8]))
                                {
                                    goto Return8;
                                }
                                if (DoesAsciiNeedEncoding(p[9]))
                                {
                                    goto Return9;
                                }
                                if (DoesAsciiNeedEncoding(p[10]))
                                {
                                    goto Return10;
                                }
                                if (DoesAsciiNeedEncoding(p[11]))
                                {
                                    goto Return11;
                                }
                                if (DoesAsciiNeedEncoding(p[12]))
                                {
                                    goto Return12;
                                }
                                if (DoesAsciiNeedEncoding(p[13]))
                                {
                                    goto Return13;
                                }
                                if (DoesAsciiNeedEncoding(p[14]))
                                {
                                    goto Return14;
                                }
                                if (DoesAsciiNeedEncoding(p[15]))
                                {
                                    goto Return15;
                                }
                            }

                            idx += 16;
                        }
                        else
                        {
                            // At least one of the following 16 bytes is non-ASCII.

                            int processNextSixteen = idx + 16;
                            Debug.Assert(processNextSixteen <= utf8Text.Length);

                            while (idx < processNextSixteen)
                            {
                                Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));

                                if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
                                {
                                    if (DoesAsciiNeedEncoding(ptr[idx]))
                                    {
                                        goto Return;
                                    }
                                    idx++;
                                }
                                else
                                {
                                    OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);

                                    Debug.Assert(nextScalarValue <= int.MaxValue);
                                    if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
                                    {
                                        goto Return;
                                    }

                                    Debug.Assert(opStatus == OperationStatus.Done);
                                    idx += utf8BytesConsumedForScalar;
                                }
                            }
                        }
                        startingAddress = (sbyte *)ptr + idx;
                    }while (utf8Text.Length - 16 >= idx);

                    // Process the remaining bytes.
                    Debug.Assert(utf8Text.Length - idx < 16);
                }
#endif

                while (idx < utf8Text.Length)
                {
                    Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));

                    if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
                    {
                        if (DoesAsciiNeedEncoding(ptr[idx]))
                        {
                            goto Return;
                        }
                        idx++;
                    }
                    else
                    {
                        OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);

                        Debug.Assert(nextScalarValue <= int.MaxValue);
                        if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
                        {
                            goto Return;
                        }

                        Debug.Assert(opStatus == OperationStatus.Done);
                        idx += utf8BytesConsumedForScalar;
                    }
                }
                Debug.Assert(idx == utf8Text.Length);

                idx = -1; // All bytes are allowed.
                goto Return;

#if NETCOREAPP
Return15:
                return(idx + 15);

Return14:
                return(idx + 14);

Return13:
                return(idx + 13);

Return12:
                return(idx + 12);

Return11:
                return(idx + 11);

Return10:
                return(idx + 10);

Return9:
                return(idx + 9);

Return8:
                return(idx + 8);

Return7:
                return(idx + 7);

Return6:
                return(idx + 6);

Return5:
                return(idx + 5);

Return4:
                return(idx + 4);

Return3:
                return(idx + 3);

Return2:
                return(idx + 2);

Return1:
                return(idx + 1);
#endif
Return:
                return(idx);
            }
        }
Example #2
0
        public override unsafe int FindFirstCharacterToEncode(char *text, int textLength)
        {
            if (text == null)
            {
                throw new ArgumentNullException(nameof(text));
            }

            int idx = 0;

#if NETCOREAPP
            if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)
            {
                short *startingAddress = (short *)text;
                while (textLength - 8 >= idx)
                {
                    Debug.Assert(startingAddress >= text && startingAddress <= (text + textLength - 8));

                    // Load the next 8 characters.
                    Vector128 <short> sourceValue;
                    Vector128 <short> mask;
                    bool containsNonAsciiChars;

                    if (Sse2.IsSupported)
                    {
                        sourceValue           = Sse2.LoadVector128(startingAddress);
                        mask                  = Sse2Helper.CreateAsciiMask(sourceValue);
                        containsNonAsciiChars = Sse2Helper.ContainsNonAsciiByte(mask.AsSByte());
                    }
                    else if (AdvSimd.Arm64.IsSupported)
                    {
                        sourceValue           = AdvSimd.LoadVector128(startingAddress);
                        mask                  = AdvSimdHelper.CreateAsciiMask(sourceValue);
                        containsNonAsciiChars = AdvSimdHelper.ContainsNonAsciiByte(mask.AsSByte());
                    }
                    else
                    {
                        throw new PlatformNotSupportedException();
                    }

                    if (containsNonAsciiChars)
                    {
                        // At least one of the following 8 characters is non-ASCII.
                        int processNextEight = idx + 8;
                        Debug.Assert(processNextEight <= textLength);
                        for (; idx < processNextEight; idx++)
                        {
                            Debug.Assert((text + idx) <= (text + textLength));
                            if (!_allowedCharacters.IsCharacterAllowed(*(text + idx)))
                            {
                                goto Return;
                            }
                        }
                        startingAddress += 8;
                    }
                    else
                    {
                        int index;

                        // Check if any of the 8 characters need to be escaped.
                        if (Sse2.IsSupported)
                        {
                            mask  = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
                            index = Sse2Helper.GetIndexOfFirstNonAsciiByte(mask.AsByte());
                        }
                        else if (AdvSimd.Arm64.IsSupported)
                        {
                            mask  = AdvSimdHelper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
                            index = AdvSimdHelper.GetIndexOfFirstNonAsciiByte(mask.AsByte());
                        }
                        else
                        {
                            throw new PlatformNotSupportedException();
                        }

                        // If index >= 16, that means none of the 8 characters needed to be escaped.
                        if (index < 16)
                        {
                            // Found at least one character that needs to be escaped, figure out the index of
                            // the first one found that needed to be escaped within the 8 characters.
                            Debug.Assert(index % 2 == 0);
                            idx += index >> 1;
                            goto Return;
                        }
                        idx             += 8;
                        startingAddress += 8;
                    }
                }

                // Process the remaining characters.
                Debug.Assert(textLength - idx < 8);
            }
#endif

            for (; idx < textLength; idx++)
            {
                Debug.Assert((text + idx) <= (text + textLength));
                if (!_allowedCharacters.IsCharacterAllowed(*(text + idx)))
                {
                    goto Return;
                }
            }

            idx = -1; // All characters are allowed.

Return:
            return(idx);
        }
        public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> utf8Text)
        {
            fixed(byte *ptr = utf8Text)
            {
                int idx = 0;

#if NETCOREAPP
                if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)
                {
                    sbyte *startingAddress = (sbyte *)ptr;
                    while (utf8Text.Length - 16 >= idx)
                    {
                        Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16));

                        bool containsNonAsciiBytes;

                        // Load the next 16 bytes, and check for ASCII text.
                        // Any byte that's not in the ASCII range will already be negative when casted to signed byte.
                        if (Sse2.IsSupported)
                        {
                            Vector128 <sbyte> sourceValue = Sse2.LoadVector128(startingAddress);
                            containsNonAsciiBytes = Sse2Helper.ContainsNonAsciiByte(sourceValue);
                        }
                        else if (AdvSimd.Arm64.IsSupported)
                        {
                            Vector128 <sbyte> sourceValue = AdvSimd.LoadVector128(startingAddress);
                            containsNonAsciiBytes = AdvSimdHelper.ContainsNonAsciiByte(sourceValue);
                        }
                        else
                        {
                            throw new PlatformNotSupportedException();
                        }

                        if (containsNonAsciiBytes)
                        {
                            // At least one of the following 16 bytes is non-ASCII.

                            int processNextSixteen = idx + 16;
                            Debug.Assert(processNextSixteen <= utf8Text.Length);

                            while (idx < processNextSixteen)
                            {
                                Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));

                                if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
                                {
                                    if (DoesAsciiNeedEncoding(ptr[idx]) == 1)
                                    {
                                        goto Return;
                                    }
                                    idx++;
                                }
                                else
                                {
                                    OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);

                                    Debug.Assert(nextScalarValue <= int.MaxValue);
                                    if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
                                    {
                                        goto Return;
                                    }

                                    Debug.Assert(opStatus == OperationStatus.Done);
                                    idx += utf8BytesConsumedForScalar;
                                }
                            }
                        }
                        else
                        {
                            if (DoesAsciiNeedEncoding(ptr[idx]) == 1

                                || DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1

                                || DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1

                                || DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1

                                || DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1 ||
                                DoesAsciiNeedEncoding(ptr[++idx]) == 1)
                            {
                                goto Return;
                            }
                            idx++;
                        }
                        startingAddress = (sbyte *)ptr + idx;
                    }

                    // Process the remaining bytes.
                    Debug.Assert(utf8Text.Length - idx < 16);
                }
#endif

                while (idx < utf8Text.Length)
                {
                    Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));

                    if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
                    {
                        if (DoesAsciiNeedEncoding(ptr[idx]) == 1)
                        {
                            goto Return;
                        }
                        idx++;
                    }
                    else
                    {
                        OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);

                        Debug.Assert(nextScalarValue <= int.MaxValue);
                        if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
                        {
                            goto Return;
                        }

                        Debug.Assert(opStatus == OperationStatus.Done);
                        idx += utf8BytesConsumedForScalar;
                    }
                }
                Debug.Assert(idx == utf8Text.Length);

                idx = -1; // All bytes are allowed.

Return:
                return(idx);
            }
        }
Example #4
0
        public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> utf8Text)
        {
            fixed(byte *ptr = utf8Text)
            {
                int idx = 0;

#if NETCOREAPP
                if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)
                {
                    sbyte *startingAddress = (sbyte *)ptr;
                    while (utf8Text.Length - 16 >= idx)
                    {
                        Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16));

                        // Load the next 16 bytes.
                        Vector128 <sbyte> sourceValue;
                        bool containsNonAsciiBytes;

                        // Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
                        // casted to signed byte.
                        if (Sse2.IsSupported)
                        {
                            sourceValue           = Sse2.LoadVector128(startingAddress);
                            containsNonAsciiBytes = Sse2Helper.ContainsNonAsciiByte(sourceValue);
                        }
                        else
                        {
                            sourceValue           = AdvSimd.LoadVector128(startingAddress);
                            containsNonAsciiBytes = AdvSimdHelper.ContainsNonAsciiByte(sourceValue);
                        }

                        if (containsNonAsciiBytes)
                        {
                            // At least one of the following 16 bytes is non-ASCII.

                            int processNextSixteen = idx + 16;
                            Debug.Assert(processNextSixteen <= utf8Text.Length);

                            while (idx < processNextSixteen)
                            {
                                Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));

                                if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
                                {
                                    if (!_allowedCharacters.IsUnicodeScalarAllowed(ptr[idx]))
                                    {
                                        goto Return;
                                    }
                                    idx++;
                                }
                                else
                                {
                                    OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);

                                    Debug.Assert(nextScalarValue <= int.MaxValue);
                                    if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
                                    {
                                        goto Return;
                                    }

                                    Debug.Assert(opStatus == OperationStatus.Done);
                                    idx += utf8BytesConsumedForScalar;
                                }
                            }
                            startingAddress = (sbyte *)ptr + idx;
                        }
                        else
                        {
                            // Check if any of the 16 bytes need to be escaped.
                            int index;

                            if (Sse2.IsSupported)
                            {
                                Vector128 <sbyte> mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
                                index = Sse2Helper.GetIndexOfFirstNonAsciiByte(mask.AsByte());
                            }
                            else
                            {
                                Vector128 <sbyte> mask = AdvSimdHelper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
                                index = AdvSimdHelper.GetIndexOfFirstNonAsciiByte(mask.AsByte());
                            }

                            // If index >= 16, that means none of the 16 bytes needed to be escaped.
                            if (index < 16)
                            {
                                // Found at least one byte that needs to be escaped, figure out the index of
                                // the first one found that needed to be escaped within the 16 bytes.
                                idx += index;
                                goto Return;
                            }
                            idx             += 16;
                            startingAddress += 16;
                        }
                    }

                    // Process the remaining bytes.
                    Debug.Assert(utf8Text.Length - idx < 16);
                }
#endif

                while (idx < utf8Text.Length)
                {
                    Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));

                    if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
                    {
                        if (!_allowedCharacters.IsUnicodeScalarAllowed(ptr[idx]))
                        {
                            goto Return;
                        }
                        idx++;
                    }
                    else
                    {
                        OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);

                        Debug.Assert(nextScalarValue <= int.MaxValue);
                        if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
                        {
                            goto Return;
                        }

                        Debug.Assert(opStatus == OperationStatus.Done);
                        idx += utf8BytesConsumedForScalar;
                    }
                }
                Debug.Assert(idx == utf8Text.Length);

                idx = -1; // All bytes are allowed.

Return:
                return(idx);
            }
        }