Esempio n. 1
0
        public override unsafe int FindFirstCharacterToEncode(char *text, int textLength)
        {
            if (text == null)
            {
                throw new ArgumentNullException(nameof(text));
            }

            int idx = 0;

#if NETCOREAPP
            if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)
            {
                short *startingAddress = (short *)text;
                while (textLength - 8 >= idx)
                {
                    Debug.Assert(startingAddress >= text && startingAddress <= (text + textLength - 8));

                    // Load the next 8 characters.
                    Vector128 <short> sourceValue;
                    Vector128 <short> mask;
                    bool containsNonAsciiChars;

                    if (Sse2.IsSupported)
                    {
                        sourceValue           = Sse2.LoadVector128(startingAddress);
                        mask                  = Sse2Helper.CreateAsciiMask(sourceValue);
                        containsNonAsciiChars = Sse2Helper.ContainsNonAsciiByte(mask.AsSByte());
                    }
                    else if (AdvSimd.Arm64.IsSupported)
                    {
                        sourceValue           = AdvSimd.LoadVector128(startingAddress);
                        mask                  = AdvSimdHelper.CreateAsciiMask(sourceValue);
                        containsNonAsciiChars = AdvSimdHelper.ContainsNonAsciiByte(mask.AsSByte());
                    }
                    else
                    {
                        throw new PlatformNotSupportedException();
                    }

                    if (containsNonAsciiChars)
                    {
                        // At least one of the following 8 characters is non-ASCII.
                        int processNextEight = idx + 8;
                        Debug.Assert(processNextEight <= textLength);
                        for (; idx < processNextEight; idx++)
                        {
                            Debug.Assert((text + idx) <= (text + textLength));
                            if (!_allowedCharacters.IsCharacterAllowed(*(text + idx)))
                            {
                                goto Return;
                            }
                        }
                        startingAddress += 8;
                    }
                    else
                    {
                        int index;

                        // Check if any of the 8 characters need to be escaped.
                        if (Sse2.IsSupported)
                        {
                            mask  = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
                            index = Sse2Helper.GetIndexOfFirstNonAsciiByte(mask.AsByte());
                        }
                        else if (AdvSimd.Arm64.IsSupported)
                        {
                            mask  = AdvSimdHelper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
                            index = AdvSimdHelper.GetIndexOfFirstNonAsciiByte(mask.AsByte());
                        }
                        else
                        {
                            throw new PlatformNotSupportedException();
                        }

                        // If index >= 16, that means none of the 8 characters needed to be escaped.
                        if (index < 16)
                        {
                            // Found at least one character that needs to be escaped, figure out the index of
                            // the first one found that needed to be escaped within the 8 characters.
                            Debug.Assert(index % 2 == 0);
                            idx += index >> 1;
                            goto Return;
                        }
                        idx             += 8;
                        startingAddress += 8;
                    }
                }

                // Process the remaining characters.
                Debug.Assert(textLength - idx < 8);
            }
#endif

            for (; idx < textLength; idx++)
            {
                Debug.Assert((text + idx) <= (text + textLength));
                if (!_allowedCharacters.IsCharacterAllowed(*(text + idx)))
                {
                    goto Return;
                }
            }

            idx = -1; // All characters are allowed.

Return:
            return(idx);
        }
Esempio n. 2
0
        public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> utf8Text)
        {
            fixed(byte *ptr = utf8Text)
            {
                int idx = 0;

#if NETCOREAPP
                if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)
                {
                    sbyte *startingAddress = (sbyte *)ptr;
                    while (utf8Text.Length - 16 >= idx)
                    {
                        Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16));

                        // Load the next 16 bytes.
                        Vector128 <sbyte> sourceValue;
                        bool containsNonAsciiBytes;

                        // Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
                        // casted to signed byte.
                        if (Sse2.IsSupported)
                        {
                            sourceValue           = Sse2.LoadVector128(startingAddress);
                            containsNonAsciiBytes = Sse2Helper.ContainsNonAsciiByte(sourceValue);
                        }
                        else
                        {
                            sourceValue           = AdvSimd.LoadVector128(startingAddress);
                            containsNonAsciiBytes = AdvSimdHelper.ContainsNonAsciiByte(sourceValue);
                        }

                        if (containsNonAsciiBytes)
                        {
                            // At least one of the following 16 bytes is non-ASCII.

                            int processNextSixteen = idx + 16;
                            Debug.Assert(processNextSixteen <= utf8Text.Length);

                            while (idx < processNextSixteen)
                            {
                                Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));

                                if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
                                {
                                    if (!_allowedCharacters.IsUnicodeScalarAllowed(ptr[idx]))
                                    {
                                        goto Return;
                                    }
                                    idx++;
                                }
                                else
                                {
                                    OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);

                                    Debug.Assert(nextScalarValue <= int.MaxValue);
                                    if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
                                    {
                                        goto Return;
                                    }

                                    Debug.Assert(opStatus == OperationStatus.Done);
                                    idx += utf8BytesConsumedForScalar;
                                }
                            }
                            startingAddress = (sbyte *)ptr + idx;
                        }
                        else
                        {
                            // Check if any of the 16 bytes need to be escaped.
                            int index;

                            if (Sse2.IsSupported)
                            {
                                Vector128 <sbyte> mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
                                index = Sse2Helper.GetIndexOfFirstNonAsciiByte(mask.AsByte());
                            }
                            else
                            {
                                Vector128 <sbyte> mask = AdvSimdHelper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
                                index = AdvSimdHelper.GetIndexOfFirstNonAsciiByte(mask.AsByte());
                            }

                            // If index >= 16, that means none of the 16 bytes needed to be escaped.
                            if (index < 16)
                            {
                                // Found at least one byte that needs to be escaped, figure out the index of
                                // the first one found that needed to be escaped within the 16 bytes.
                                idx += index;
                                goto Return;
                            }
                            idx             += 16;
                            startingAddress += 16;
                        }
                    }

                    // Process the remaining bytes.
                    Debug.Assert(utf8Text.Length - idx < 16);
                }
#endif

                while (idx < utf8Text.Length)
                {
                    Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));

                    if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
                    {
                        if (!_allowedCharacters.IsUnicodeScalarAllowed(ptr[idx]))
                        {
                            goto Return;
                        }
                        idx++;
                    }
                    else
                    {
                        OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);

                        Debug.Assert(nextScalarValue <= int.MaxValue);
                        if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
                        {
                            goto Return;
                        }

                        Debug.Assert(opStatus == OperationStatus.Done);
                        idx += utf8BytesConsumedForScalar;
                    }
                }
                Debug.Assert(idx == utf8Text.Length);

                idx = -1; // All bytes are allowed.

Return:
                return(idx);
            }
        }