コード例 #1
0
        internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase()
        {
            uint hash1 = (5381 << 16) + 5381;
            uint hash2 = hash1;

            fixed(char *src = &_firstChar)
            {
                Debug.Assert(src[this.Length] == '\0', "src[this.Length] == '\\0'");
                Debug.Assert(((int)src) % 4 == 0, "Managed string should start at 4 bytes boundary");

                uint *ptr    = (uint *)src;
                int   length = this.Length;

                // We "normalize to lowercase" every char by ORing with 0x0020. This casts
                // a very wide net because it will change, e.g., '^' to '~'. But that should
                // be ok because we expect this to be very rare in practice.
                const uint NormalizeToLowercase = 0x0020_0020u; // valid both for big-endian and for little-endian

                while (length > 2)
                {
                    uint p0 = ptr[0];
                    uint p1 = ptr[1];
                    if (!Utf16Utility.AllCharsInUInt32AreAscii(p0 | p1))
                    {
                        goto NotAscii;
                    }

                    length -= 4;
                    // Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator
                    hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase);
                    hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase);
                    ptr  += 2;
                }

                if (length > 0)
                {
                    uint p0 = ptr[0];
                    if (!Utf16Utility.AllCharsInUInt32AreAscii(p0))
                    {
                        goto NotAscii;
                    }

                    // Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator
                    hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase);
                }
            }

            return((int)(hash1 + (hash2 * 1566083941)));

NotAscii:
            return(GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(this));
コード例 #2
0
        /// <summary>
        /// Compute a Marvin OrdinalIgnoreCase hash and collapse it into a 32-bit hash.
        /// n.b. <paramref name="count"/> is specified as char count, not byte count.
        /// </summary>
        public static int ComputeHash32OrdinalIgnoreCase(ref char data, int count, uint p0, uint p1)
        {
            uint  ucount     = (uint)count; // in chars
            nuint byteOffset = 0;           // in bytes
            uint  tempValue;

            // We operate on 32-bit integers (two chars) at a time.

            while (ucount >= 2)
            {
                tempValue = Unsafe.ReadUnaligned <uint>(ref Unsafe.As <char, byte>(ref Unsafe.AddByteOffset(ref data, byteOffset)));
                if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
                {
                    goto NotAscii;
                }
                p0 += Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue);
                Block(ref p0, ref p1);

                byteOffset += 4;
                ucount     -= 2;
            }

            // We have either one char (16 bits) or zero chars left over.
            Debug.Assert(ucount < 2);

            if (ucount > 0)
            {
                tempValue = Unsafe.AddByteOffset(ref data, byteOffset);
                if (tempValue > 0x7Fu)
                {
                    goto NotAscii;
                }

                // addition is written with -0x80u to allow fall-through to next statement rather than jmp past it
                p0 += Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) + (0x800000u - 0x80u);
            }
            p0 += 0x80u;

            Block(ref p0, ref p1);
            Block(ref p0, ref p1);

            return((int)(p1 ^ p0));

NotAscii:
            Debug.Assert(0 <= ucount && ucount <= Int32.MaxValue); // this should fit into a signed int
            return(ComputeHash32OrdinalIgnoreCaseSlow(ref Unsafe.AddByteOffset(ref data, byteOffset), (int)ucount, p0, p1));
        }
コード例 #3
0
ファイル: TextInfo.cs プロジェクト: xzipup/corefx
        private unsafe string ChangeCaseCommon <TConversion>(string source) where TConversion : struct
        {
            Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
            bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds

            Debug.Assert(!_invariantMode);
            Debug.Assert(source != null);

            // If the string is empty, we're done.
            if (source.Length == 0)
            {
                return(string.Empty);
            }

            fixed(char *pSource = source)
            {
                nuint currIdx = 0; // in chars

                // If this culture's casing for ASCII is the same as invariant, try to take
                // a fast path that'll work in managed code and ASCII rather than calling out
                // to the OS for culture-aware casing.
                if (IsAsciiCasingSameAsInvariant)
                {
                    // Read 2 chars (one 32-bit integer) at a time

                    if (source.Length >= 2)
                    {
                        nuint lastIndexWhereCanReadTwoChars = (uint)source.Length - 2;
                        do
                        {
                            // See the comments in ChangeCaseCommon<TConversion>(ROS<char>, Span<char>) for a full explanation of the below code.

                            uint tempValue = Unsafe.ReadUnaligned <uint>(pSource + currIdx);
                            if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
                            {
                                goto NotAscii;
                            }
                            if ((toUpper) ? Utf16Utility.UInt32ContainsAnyLowercaseAsciiChar(tempValue) : Utf16Utility.UInt32ContainsAnyUppercaseAsciiChar(tempValue))
                            {
                                goto AsciiMustChangeCase;
                            }

                            currIdx += 2;
                        } while (currIdx <= lastIndexWhereCanReadTwoChars);
                    }

                    // If there's a single character left to convert, do it now.
                    if ((source.Length & 1) != 0)
                    {
                        uint tempValue = pSource[currIdx];
                        if (tempValue > 0x7Fu)
                        {
                            goto NotAscii;
                        }
                        if ((toUpper) ? ((tempValue - 'a') <= (uint)('z' - 'a')) : ((tempValue - 'A') <= (uint)('Z' - 'A')))
                        {
                            goto AsciiMustChangeCase;
                        }
                    }

                    // We got through all characters without finding anything that needed to change - done!
                    return(source);

AsciiMustChangeCase:
                    {
                        // We reached ASCII data that requires a case change.
                        // This will necessarily allocate a new string, but let's try to stay within the managed (non-localization tables)
                        // conversion code path if we can.

                        string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count

                        // copy existing known-good data into the result
                        Span <char> resultSpan = new Span <char>(ref result.GetRawStringData(), result.Length);
                        source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);

                        // and re-run the fast span-based logic over the remainder of the data
                        ChangeCaseCommon <TConversion>(source.AsSpan((int)currIdx), resultSpan.Slice((int)currIdx));
                        return(result);
                    }
                }

NotAscii:
                {
                    // We reached non-ASCII data *or* the requested culture doesn't map ASCII data the same way as the invariant culture.
                    // In either case we need to fall back to the localization tables.

                    string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count

                    if (currIdx > 0)
                    {
                        // copy existing known-good data into the result
                        Span <char> resultSpan = new Span <char>(ref result.GetRawStringData(), result.Length);
                        source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);
                    }

                    // and run the culture-aware logic over the remainder of the data
                    fixed(char *pResult = result)
                    {
                        ChangeCase(pSource + currIdx, source.Length - (int)currIdx, pResult + currIdx, result.Length - (int)currIdx, toUpper);
                    }

                    return(result);
                }
            }
        }
コード例 #4
0
ファイル: TextInfo.cs プロジェクト: xzipup/corefx
        private unsafe void ChangeCaseCommon <TConversion>(ref char source, ref char destination, int charCount) where TConversion : struct
        {
            Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
            bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds

            Debug.Assert(!_invariantMode);
            Debug.Assert(charCount >= 0);

            if (charCount == 0)
            {
                goto Return;
            }

            fixed(char *pSource = &source)
            fixed(char *pDestination = &destination)
            {
                nuint currIdx = 0; // in chars

                if (IsAsciiCasingSameAsInvariant)
                {
                    // Read 4 chars (two 32-bit integers) at a time

                    if (charCount >= 4)
                    {
                        nuint lastIndexWhereCanReadFourChars = (uint)charCount - 4;
                        do
                        {
                            // This is a mostly branchless case change routine. Generally speaking, we assume that the majority
                            // of input is ASCII, so the 'if' checks below should normally evaluate to false. However, within
                            // the ASCII data, we expect that characters of either case might be about equally distributed, so
                            // we want the case change operation itself to be branchless. This gives optimal performance in the
                            // common case. We also expect that developers aren't passing very long (16+ character) strings into
                            // this method, so we won't bother vectorizing until data shows us that it's worthwhile to do so.

                            uint tempValue = Unsafe.ReadUnaligned <uint>(pSource + currIdx);
                            if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
                            {
                                goto NonAscii;
                            }
                            tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
                            Unsafe.WriteUnaligned <uint>(pDestination + currIdx, tempValue);

                            tempValue = Unsafe.ReadUnaligned <uint>(pSource + currIdx + 2);
                            if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
                            {
                                goto NonAsciiSkipTwoChars;
                            }
                            tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
                            Unsafe.WriteUnaligned <uint>(pDestination + currIdx + 2, tempValue);
                            currIdx += 4;
                        } while (currIdx <= lastIndexWhereCanReadFourChars);

                        // At this point, there are fewer than 4 characters remaining to convert.
                        Debug.Assert((uint)charCount - currIdx < 4);
                    }

                    // If there are 2 or 3 characters left to convert, we'll convert 2 of them now.
                    if ((charCount & 2) != 0)
                    {
                        uint tempValue = Unsafe.ReadUnaligned <uint>(pSource + currIdx);
                        if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
                        {
                            goto NonAscii;
                        }
                        tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
                        Unsafe.WriteUnaligned <uint>(pDestination + currIdx, tempValue);
                        currIdx += 2;
                    }

                    // If there's a single character left to convert, do it now.
                    if ((charCount & 1) != 0)
                    {
                        uint tempValue = pSource[currIdx];
                        if (tempValue > 0x7Fu)
                        {
                            goto NonAscii;
                        }
                        tempValue             = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
                        pDestination[currIdx] = (char)tempValue;
                    }

                    // And we're finished!

                    goto Return;

                    // If we reached this point, we found non-ASCII data.
                    // Fall back down the p/invoke code path.

NonAsciiSkipTwoChars:
                    currIdx += 2;

NonAscii:
                    Debug.Assert(currIdx < (uint)charCount, "We somehow read past the end of the buffer.");
                    charCount -= (int)currIdx;
                }

                // We encountered non-ASCII data and therefore can't perform invariant case conversion; or the requested culture
                // has a case conversion that's different from the invariant culture, even for ASCII data (e.g., tr-TR converts
                // 'i' (U+0069) to Latin Capital Letter I With Dot Above (U+0130)).

                ChangeCase(pSource + currIdx, charCount, pDestination + currIdx, charCount, toUpper);
            }

Return:
            return;
        }