C# (CSharp) AdvSimd.Add Examples

Programming Language: C# (CSharp)

Class/Type: AdvSimd

Method/Function: Add

Examples at hotexamples.com: 9

C# (CSharp) AdvSimd.Add - 9 examples found. These are the top rated real world C# (CSharp) examples of AdvSimd.Add extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CompareEqual(19)

And(18)

CompareTest(15)

CompareGreaterThan(15)

AddPairwiseWideningAndAdd(15)

Abs(14)

AddSaturate(14)

AddRoundedHighNarrowUpper(13)

AddHighNarrowingUpper(13)

AddRoundedHighNarrowingUpper(13)

AddHighNarrowUpper(13)

AbsoluteDifferenceWideningUpperAndAdd(13)

BitwiseClear(13)

AddReturningHighNarrowUpper(13)

AddReturningRoundedHighNarrowUpper(13)

AddScalar(12)

AbsSaturate(12)

AbsoluteDifferenceWideningLowerAndAdd(12)

AbsoluteDifferenceAdd(12)

AbsoluteDifference(12)

AddPairwise(12)

AddPairwiseWidening(12)

Ceiling(11)

AddRoundedHighNarrowLower(11)

AddWideningLower(11)

AbsoluteDifferenceWideningLower(11)

AddReturningHighNarrowLower(11)

AbsoluteCompareGreaterThan(11)

AddHighNarrowingLower(11)

AddHighNarrowLower(11)

AbsoluteCompareLessThan(11)

AddReturningRoundedHighNarrowLower(10)

AbsoluteCompareGreaterThanOrEqual(10)

CeilingScalar(10)

BitwiseSelect(10)

AbsoluteCompareLessThanOrEqual(10)

AbsScalar(10)

AddSaturateScalar(10)

ConvertToUInt32RoundToZero(9)

ConvertToInt32RoundAwayFromZero(9)

ConvertToInt32RoundToPositiveInfinity(9)

AbsoluteDifferenceWideningUpper(9)

ConvertToInt32RoundToNegativeInfinity(9)

ConvertToSingleScalar(9)

ConvertToInt32RoundToEven(9)

ConvertToUInt32RoundAwayFromZero(9)

ConvertToSingle(9)

ConvertToInt32RoundToZero(9)

ConvertToUInt32RoundToEven(9)

ConvertToUInt32RoundToNegativeInfinity(9)

Example #1

Show file

File: Add.UInt32.cs Project: zwei222/coreclr

        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = AdvSimd.Add(
                AdvSimd.LoadVector64((UInt32 *)(&test._fld1)),
                AdvSimd.LoadVector64((UInt32 *)(&test._fld2))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }

Example #2

Show file

            public void RunStructFldScenario_Load(SimpleBinaryOpTest__Add_Vector64_Int16 testClass)
            {
                fixed(Vector64 <Int16> *pFld1 = &_fld1)
                fixed(Vector64 <Int16> *pFld2 = &_fld2)
                {
                    var result = AdvSimd.Add(
                        AdvSimd.LoadVector64((Int16 *)(pFld1)),
                        AdvSimd.LoadVector64((Int16 *)(pFld2))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
                }
            }

Example #3

Show file

        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector64 <Int16> *pFld1 = &_fld1)
            fixed(Vector64 <Int16> *pFld2 = &_fld2)
            {
                var result = AdvSimd.Add(
                    AdvSimd.LoadVector64((Int16 *)(pFld1)),
                    AdvSimd.LoadVector64((Int16 *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
            }
        }

Example #4

Show file

File: Add.Vector128.Single.cs Project: zpplus/runtime

        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector128 <Single> *pClsVar1 = &_clsVar1)
            fixed(Vector128 <Single> *pClsVar2 = &_clsVar2)
            {
                var result = AdvSimd.Add(
                    AdvSimd.LoadVector128((Single *)(pClsVar1)),
                    AdvSimd.LoadVector128((Single *)(pClsVar2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
            }
        }

Example #5

Show file

        public void RunClassLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load));

            var test = new SimpleBinaryOpTest__Add_Vector64_Int16();

            fixed(Vector64 <Int16> *pFld1 = &test._fld1)
            fixed(Vector64 <Int16> *pFld2 = &test._fld2)
            {
                var result = AdvSimd.Add(
                    AdvSimd.LoadVector64((Int16 *)(pFld1)),
                    AdvSimd.LoadVector64((Int16 *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
            }
        }

Example #6

Show file

File: MixCommand.cs Project: Ryujinx/Ryujinx

        private void ProcessMixAdvSimd(Span <float> outputMix, ReadOnlySpan <float> inputMix)
        {
            Vector128 <float> volumeVec = Vector128.Create(Volume);

            ReadOnlySpan <Vector128 <float> > inputVec  = MemoryMarshal.Cast <float, Vector128 <float> >(inputMix);
            Span <Vector128 <float> >         outputVec = MemoryMarshal.Cast <float, Vector128 <float> >(outputMix);

            int sisdStart = inputVec.Length * 4;

            for (int i = 0; i < inputVec.Length; i++)
            {
                outputVec[i] = AdvSimd.Add(outputVec[i], AdvSimd.Ceiling(AdvSimd.Multiply(inputVec[i], volumeVec)));
            }

            for (int i = sisdStart; i < inputMix.Length; i++)
            {
                outputMix[i] += FloatingPointHelper.MultiplyRoundUp(inputMix[i], Volume);
            }
        }

Example #7

Show file

File: Utf16Utility.Validation.cs Project: humbatoa/runtime

        // Returns &inputBuffer[inputLength] if the input buffer is valid.
        /// <summary>
        /// Given an input buffer <paramref name="pInputBuffer"/> of char length <paramref name="inputLength"/>,
        /// returns a pointer to where the first invalid data appears in <paramref name="pInputBuffer"/>.
        /// </summary>
        /// <remarks>
        /// Returns a pointer to the end of <paramref name="pInputBuffer"/> if the buffer is well-formed.
        /// </remarks>
        public static char *GetPointerToFirstInvalidChar(char *pInputBuffer, int inputLength, out long utf8CodeUnitCountAdjustment, out int scalarCountAdjustment)
        {
            Debug.Assert(inputLength >= 0, "Input length must not be negative.");
            Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null.");

            // First, we'll handle the common case of all-ASCII. If this is able to
            // consume the entire buffer, we'll skip the remainder of this method's logic.

            int numAsciiCharsConsumedJustNow = (int)ASCIIUtility.GetIndexOfFirstNonAsciiChar(pInputBuffer, (uint)inputLength);

            Debug.Assert(0 <= numAsciiCharsConsumedJustNow && numAsciiCharsConsumedJustNow <= inputLength);

            pInputBuffer += (uint)numAsciiCharsConsumedJustNow;
            inputLength  -= numAsciiCharsConsumedJustNow;

            if (inputLength == 0)
            {
                utf8CodeUnitCountAdjustment = 0;
                scalarCountAdjustment       = 0;
                return(pInputBuffer);
            }

            // If we got here, it means we saw some non-ASCII data, so within our
            // vectorized code paths below we'll handle all non-surrogate UTF-16
            // code points branchlessly. We'll only branch if we see surrogates.
            //
            // We still optimistically assume the data is mostly ASCII. This means that the
            // number of UTF-8 code units and the number of scalars almost matches the number
            // of UTF-16 code units. As we go through the input and find non-ASCII
            // characters, we'll keep track of these "adjustment" fixups. To get the
            // total number of UTF-8 code units required to encode the input data, add
            // the UTF-8 code unit count adjustment to the number of UTF-16 code units
            // seen.  To get the total number of scalars present in the input data,
            // add the scalar count adjustment to the number of UTF-16 code units seen.

            long tempUtf8CodeUnitCountAdjustment = 0;
            int  tempScalarCountAdjustment       = 0;

            if ((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported)
            {
                if (inputLength >= Vector128 <ushort> .Count)
                {
                    Vector128 <ushort> vector0080 = Vector128.Create((ushort)0x80);
                    Vector128 <ushort> vectorA800 = Vector128.Create((ushort)0xA800);
                    Vector128 <short>  vector8800 = Vector128.Create(unchecked ((short)0x8800));
                    Vector128 <ushort> vectorZero = Vector128 <ushort> .Zero;
                    do
                    {
                        Vector128 <ushort> utf16Data;
                        if (AdvSimd.Arm64.IsSupported)
                        {
                            utf16Data = AdvSimd.LoadVector128((ushort *)pInputBuffer); // unaligned
                        }
                        else
                        {
                            utf16Data = Sse2.LoadVector128((ushort *)pInputBuffer); // unaligned
                        }

                        Vector128 <ushort> charIsNonAscii;

                        if (AdvSimd.Arm64.IsSupported)
                        {
                            // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
                            // input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
                            charIsNonAscii = AdvSimd.Min(utf16Data, vector0080);
                        }
                        else if (Sse41.IsSupported)
                        {
                            // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
                            // input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
                            charIsNonAscii = Sse41.Min(utf16Data, vector0080);
                        }
                        else
                        {
                            // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
                            // input was 0x0080 <= [value] <= 0x7FFF. The case where 0x8000 <= [value] will
                            // be handled in a few lines.

                            charIsNonAscii = Sse2.AndNot(Sse2.CompareGreaterThan(vector0080.AsInt16(), utf16Data.AsInt16()).AsUInt16(), vector0080);
                        }

#if DEBUG
                        // Quick check to ensure we didn't accidentally set the 0x8000 bit of any element.
                        uint debugMask;
                        if (AdvSimd.Arm64.IsSupported)
                        {
                            debugMask = GetNonAsciiBytes(charIsNonAscii.AsByte());
                        }
                        else
                        {
                            debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
                        }
                        Debug.Assert((debugMask & 0b_1010_1010_1010_1010) == 0, "Shouldn't have set the 0x8000 bit of any element in 'charIsNonAscii'.");
#endif // DEBUG

                        // Sets the 0x8080 bits of each element in 'charIsNonAscii' if the corresponding
                        // input was 0x0800 <= [value]. This also handles the missing range a few lines above.

                        Vector128 <ushort> charIsThreeByteUtf8Encoded;
                        uint mask;

                        if (AdvSimd.IsSupported)
                        {
                            charIsThreeByteUtf8Encoded = AdvSimd.Subtract(vectorZero, AdvSimd.ShiftRightLogical(utf16Data, 11));
                            mask = GetNonAsciiBytes(AdvSimd.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
                        }
                        else
                        {
                            charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));
                            mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
                        }

                        // Each even bit of mask will be 1 only if the char was >= 0x0080,
                        // and each odd bit of mask will be 1 only if the char was >= 0x0800.
                        //
                        // Example for UTF-16 input "[ 0123 ] [ 1234 ] ...":
                        //
                        //            ,-- set if char[1] is >= 0x0800
                        //            |   ,-- set if char[0] is >= 0x0800
                        //            v   v
                        // mask = ... 1 1 0 1
                        //              ^   ^-- set if char[0] is non-ASCII
                        //              `-- set if char[1] is non-ASCII
                        //
                        // This means we can popcnt the number of set bits, and the result is the
                        // number of *additional* UTF-8 bytes that each UTF-16 code unit requires as
                        // it expands. This results in the wrong count for UTF-16 surrogate code
                        // units (we just counted that each individual code unit expands to 3 bytes,
                        // but in reality a well-formed UTF-16 surrogate pair expands to 4 bytes).
                        // We'll handle this in just a moment.
                        //
                        // For now, compute the popcnt but squirrel it away. We'll fold it in to the
                        // cumulative UTF-8 adjustment factor once we determine that there are no
                        // unpaired surrogates in our data. (Unpaired surrogates would invalidate
                        // our computed result and we'd have to throw it away.)

                        uint popcnt = (uint)BitOperations.PopCount(mask);

                        // Surrogates need to be special-cased for two reasons: (a) we need
                        // to account for the fact that we over-counted in the addition above;
                        // and (b) they require separate validation.
                        if (AdvSimd.Arm64.IsSupported)
                        {
                            utf16Data = AdvSimd.Add(utf16Data, vectorA800);
                            mask      = GetNonAsciiBytes(AdvSimd.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
                        }
                        else
                        {
                            utf16Data = Sse2.Add(utf16Data, vectorA800);
                            mask      = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
                        }

                        if (mask != 0)
                        {
                            // There's at least one UTF-16 surrogate code unit present.
                            // Since we performed a pmovmskb operation on the result of a 16-bit pcmpgtw,
                            // the resulting bits of 'mask' will occur in pairs:
                            // - 00 if the corresponding UTF-16 char was not a surrogate code unit;
                            // - 11 if the corresponding UTF-16 char was a surrogate code unit.
                            //
                            // A UTF-16 high/low surrogate code unit has the bit pattern [ 11011q## ######## ],
                            // where # is any bit; q = 0 represents a high surrogate, and q = 1 represents
                            // a low surrogate. Since we added 0xA800 in the vectorized operation above,
                            // our surrogate pairs will now have the bit pattern [ 10000q## ######## ].
                            // If we logical right-shift each word by 3, we'll end up with the bit pattern
                            // [ 00010000 q####### ], which means that we can immediately use pmovmskb to
                            // determine whether a given char was a high or a low surrogate.
                            //
                            // Therefore the resulting bits of 'mask2' will occur in pairs:
                            // - 00 if the corresponding UTF-16 char was a high surrogate code unit;
                            // - 01 if the corresponding UTF-16 char was a low surrogate code unit;
                            // - ## (garbage) if the corresponding UTF-16 char was not a surrogate code unit.
                            //   Since 'mask' already has 00 in these positions (since the corresponding char
                            //   wasn't a surrogate), "mask AND mask2 == 00" holds for these positions.

                            uint mask2;
                            if (AdvSimd.Arm64.IsSupported)
                            {
                                mask2 = GetNonAsciiBytes(AdvSimd.ShiftRightLogical(utf16Data, 3).AsByte());
                            }
                            else
                            {
                                mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
                            }

                            // 'lowSurrogatesMask' has its bits occur in pairs:
                            // - 01 if the corresponding char was a low surrogate char,
                            // - 00 if the corresponding char was a high surrogate char or not a surrogate at all.

                            uint lowSurrogatesMask = mask2 & mask;

                            // 'highSurrogatesMask' has its bits occur in pairs:
                            // - 01 if the corresponding char was a high surrogate char,
                            // - 00 if the corresponding char was a low surrogate char or not a surrogate at all.

                            uint highSurrogatesMask = (mask2 ^ 0b_0101_0101_0101_0101u /* flip all even-numbered bits 00 <-> 01 */) & mask;

                            Debug.Assert((highSurrogatesMask & lowSurrogatesMask) == 0,
                                         "A char cannot simultaneously be both a high and a low surrogate char.");

                            Debug.Assert(((highSurrogatesMask | lowSurrogatesMask) & 0b_1010_1010_1010_1010u) == 0,
                                         "Only even bits (no odd bits) of the masks should be set.");

                            // Now check that each high surrogate is followed by a low surrogate and that each
                            // low surrogate follows a high surrogate. We make an exception for the case where
                            // the final char of the vector is a high surrogate, since we can't perform validation
                            // on it until the next iteration of the loop when we hope to consume the matching
                            // low surrogate.

                            highSurrogatesMask <<= 2;
                            if ((ushort)highSurrogatesMask != lowSurrogatesMask)
                            {
                                goto NonVectorizedLoop; // error: mismatched surrogate pair; break out of vectorized logic
                            }

                            if (highSurrogatesMask > ushort.MaxValue)
                            {
                                // There was a standalone high surrogate at the end of the vector.
                                // We'll adjust our counters so that we don't consider this char consumed.

                                highSurrogatesMask = (ushort)highSurrogatesMask; // don't allow stray high surrogate to be consumed by popcnt
                                popcnt            -= 2;                          // the '0xC000_0000' bits in the original mask are shifted out and discarded, so account for that here
                                pInputBuffer--;
                                inputLength++;
                            }

                            // If we're 64-bit, we can perform the zero-extension of the surrogate pairs count for
                            // free right now, saving the extension step a few lines below. If we're 32-bit, the
                            // convertion to nuint immediately below is a no-op, and we'll pay the cost of the real
                            // 64 -bit extension a few lines below.
                            nuint surrogatePairsCountNuint = (uint)BitOperations.PopCount(highSurrogatesMask);

                            // 2 UTF-16 chars become 1 Unicode scalar

                            tempScalarCountAdjustment -= (int)surrogatePairsCountNuint;

                            // Since each surrogate code unit was >= 0x0800, we eagerly assumed
                            // it'd be encoded as 3 UTF-8 code units, so our earlier popcnt computation
                            // assumes that the pair is encoded as 6 UTF-8 code units. Since each
                            // pair is in reality only encoded as 4 UTF-8 code units, we need to
                            // perform this adjustment now.

                            if (IntPtr.Size == 8)
                            {
                                // Since we've already zero-extended surrogatePairsCountNuint, we can directly
                                // sub + sub. It's more efficient than shl + sub.
                                tempUtf8CodeUnitCountAdjustment -= (long)surrogatePairsCountNuint;
                                tempUtf8CodeUnitCountAdjustment -= (long)surrogatePairsCountNuint;
                            }
                            else
                            {
                                // Take the hit of the 64-bit extension now.
                                tempUtf8CodeUnitCountAdjustment -= 2 * (uint)surrogatePairsCountNuint;
                            }
                        }

                        tempUtf8CodeUnitCountAdjustment += popcnt;
                        pInputBuffer += Vector128 <ushort> .Count;
                        inputLength  -= Vector128 <ushort> .Count;
                    } while (inputLength >= Vector128 <ushort> .Count);
                }
            }
            else if (Vector.IsHardwareAccelerated)
            {
                if (inputLength >= Vector <ushort> .Count)
                {
                    Vector <ushort> vector0080 = new Vector <ushort>(0x0080);
                    Vector <ushort> vector0400 = new Vector <ushort>(0x0400);
                    Vector <ushort> vector0800 = new Vector <ushort>(0x0800);
                    Vector <ushort> vectorD800 = new Vector <ushort>(0xD800);

                    do
                    {
                        // The 'twoOrMoreUtf8Bytes' and 'threeOrMoreUtf8Bytes' vectors will contain
                        // elements whose values are 0xFFFF (-1 as signed word) iff the corresponding
                        // UTF-16 code unit was >= 0x0080 and >= 0x0800, respectively. By summing these
                        // vectors, each element of the sum will contain one of three values:
                        //
                        // 0x0000 ( 0) = original char was 0000..007F
                        // 0xFFFF (-1) = original char was 0080..07FF
                        // 0xFFFE (-2) = original char was 0800..FFFF
                        //
                        // We'll negate them to produce a value 0..2 for each element, then sum all the
                        // elements together to produce the number of *additional* UTF-8 code units
                        // required to represent this UTF-16 data. This is similar to the popcnt step
                        // performed by the SSE2 code path. This will overcount surrogates, but we'll
                        // handle that shortly.

                        Vector <ushort>  utf16Data            = Unsafe.ReadUnaligned <Vector <ushort> >(pInputBuffer);
                        Vector <ushort>  twoOrMoreUtf8Bytes   = Vector.GreaterThanOrEqual(utf16Data, vector0080);
                        Vector <ushort>  threeOrMoreUtf8Bytes = Vector.GreaterThanOrEqual(utf16Data, vector0800);
                        Vector <nuint_t> sumVector            = (Vector <nuint_t>)(Vector <ushort> .Zero - twoOrMoreUtf8Bytes - threeOrMoreUtf8Bytes);

                        // We'll try summing by a natural word (rather than a 16-bit word) at a time,
                        // which should halve the number of operations we must perform.

                        nuint popcnt = 0;
                        for (int i = 0; i < Vector <nuint_t> .Count; i++)
                        {
                            popcnt += (nuint)sumVector[i];
                        }

                        uint popcnt32 = (uint)popcnt;
                        if (IntPtr.Size == 8)
                        {
                            popcnt32 += (uint)(popcnt >> 32);
                        }

                        // As in the SSE4.1 paths, compute popcnt but don't fold it in until we
                        // know there aren't any unpaired surrogates in the input data.

                        popcnt32 = (ushort)popcnt32 + (popcnt32 >> 16);

                        // Now check for surrogates.

                        utf16Data -= vectorD800;
                        Vector <ushort> surrogateChars = Vector.LessThan(utf16Data, vector0800);
                        if (surrogateChars != Vector <ushort> .Zero)
                        {
                            // There's at least one surrogate (high or low) UTF-16 code unit in
                            // the vector. We'll build up additional vectors: 'highSurrogateChars'
                            // and 'lowSurrogateChars', where the elements are 0xFFFF iff the original
                            // UTF-16 code unit was a high or low surrogate, respectively.

                            Vector <ushort> highSurrogateChars = Vector.LessThan(utf16Data, vector0400);
                            Vector <ushort> lowSurrogateChars  = Vector.AndNot(surrogateChars, highSurrogateChars);

                            // We want to make sure that each high surrogate code unit is followed by
                            // a low surrogate code unit and each low surrogate code unit follows a
                            // high surrogate code unit. Since we don't have an equivalent of pmovmskb
                            // or palignr available to us, we'll do this as a loop. We won't look at
                            // the very last high surrogate char element since we don't yet know if
                            // the next vector read will have a low surrogate char element.

                            if (lowSurrogateChars[0] != 0)
                            {
                                goto Error; // error: start of buffer contains standalone low surrogate char
                            }

                            ushort surrogatePairsCount = 0;
                            for (int i = 0; i < Vector <ushort> .Count - 1; i++)
                            {
                                surrogatePairsCount -= highSurrogateChars[i]; // turns into +1 or +0
                                if (highSurrogateChars[i] != lowSurrogateChars[i + 1])
                                {
                                    goto NonVectorizedLoop; // error: mismatched surrogate pair; break out of vectorized logic
                                }
                            }

                            if (highSurrogateChars[Vector <ushort> .Count - 1] != 0)
                            {
                                // There was a standalone high surrogate at the end of the vector.
                                // We'll adjust our counters so that we don't consider this char consumed.

                                pInputBuffer--;
                                inputLength++;
                                popcnt32 -= 2;
                            }

                            nint surrogatePairsCountNint = (nint)surrogatePairsCount; // zero-extend to native int size

                            // 2 UTF-16 chars become 1 Unicode scalar

                            tempScalarCountAdjustment -= (int)surrogatePairsCountNint;

                            // Since each surrogate code unit was >= 0x0800, we eagerly assumed
                            // it'd be encoded as 3 UTF-8 code units. Each surrogate half is only
                            // encoded as 2 UTF-8 code units (for 4 UTF-8 code units total),
                            // so we'll adjust this now.

                            tempUtf8CodeUnitCountAdjustment -= surrogatePairsCountNint;
                            tempUtf8CodeUnitCountAdjustment -= surrogatePairsCountNint;
                        }

                        tempUtf8CodeUnitCountAdjustment += popcnt32;
                        pInputBuffer += Vector <ushort> .Count;
                        inputLength  -= Vector <ushort> .Count;
                    } while (inputLength >= Vector <ushort> .Count);
                }
            }

NonVectorizedLoop:

            // Vectorization isn't supported on our current platform, or the input was too small to benefit
            // from vectorization, or we saw invalid UTF-16 data in the vectorized code paths and need to
            // drain remaining valid chars before we report failure.

            for (; inputLength > 0; pInputBuffer++, inputLength--)
            {
                uint thisChar = pInputBuffer[0];
                if (thisChar <= 0x7F)
                {
                    continue;
                }

                // Bump adjustment by +1 for U+0080..U+07FF; by +2 for U+0800..U+FFFF.
                // This optimistically assumes no surrogates, which we'll handle shortly.

                tempUtf8CodeUnitCountAdjustment += (thisChar + 0x0001_F800u) >> 16;

                if (!UnicodeUtility.IsSurrogateCodePoint(thisChar))
                {
                    continue;
                }

                // Found a surrogate char. Back out the adjustment we made above, then
                // try to consume the entire surrogate pair all at once. We won't bother
                // trying to interpret the surrogate pair as a scalar value; we'll only
                // validate that its bit pattern matches what's expected for a surrogate pair.

                tempUtf8CodeUnitCountAdjustment -= 2;

                if (inputLength == 1)
                {
                    goto Error; // input buffer too small to read a surrogate pair
                }

                thisChar = Unsafe.ReadUnaligned <uint>(pInputBuffer);
                if (((thisChar - (BitConverter.IsLittleEndian ? 0xDC00_D800u : 0xD800_DC00u)) & 0xFC00_FC00u) != 0)
                {
                    goto Error; // not a well-formed surrogate pair
                }

                tempScalarCountAdjustment--;          // 2 UTF-16 code units -> 1 scalar
                tempUtf8CodeUnitCountAdjustment += 2; // 2 UTF-16 code units -> 4 UTF-8 code units

                pInputBuffer++;                       // consumed one extra char
                inputLength--;
            }

Error:

            // Also used for normal return.

            utf8CodeUnitCountAdjustment = tempUtf8CodeUnitCountAdjustment;
            scalarCountAdjustment       = tempScalarCountAdjustment;
            return(pInputBuffer);
        }

Example #8

Show file

        internal static void Step(ref ushort preSum1, ref ushort preSum2, byte[] buf, uint len)
        {
            /*
             * Split Adler-32 into component sums.
             */
            uint s1 = preSum1;
            uint s2 = preSum2;

            int bufPos = 0;

            /*
             * Process the data in blocks.
             */
            uint BLOCK_SIZE = 1 << 5;
            uint blocks     = len / BLOCK_SIZE;

            len -= blocks * BLOCK_SIZE;

            while (blocks != 0)
            {
                uint n = Adler32Context.NMAX / BLOCK_SIZE; /* The NMAX constraint. */

                if (n > blocks)
                {
                    n = blocks;
                }

                blocks -= n;

                /*
                 * Process n blocks of data. At most NMAX data bytes can be
                 * processed before s2 must be reduced modulo ADLER_MODULE.
                 */
                Vector128 <uint>   v_s2           = Vector128.Create(s1 * n, 0, 0, 0);
                Vector128 <uint>   v_s1           = Vector128.Create(0u, 0, 0, 0);
                Vector128 <ushort> v_column_sum_1 = AdvSimd.DuplicateToVector128((ushort)0);
                Vector128 <ushort> v_column_sum_2 = AdvSimd.DuplicateToVector128((ushort)0);
                Vector128 <ushort> v_column_sum_3 = AdvSimd.DuplicateToVector128((ushort)0);
                Vector128 <ushort> v_column_sum_4 = AdvSimd.DuplicateToVector128((ushort)0);

                do
                {
                    /*
                     * Load 32 input bytes.
                     */
                    Vector128 <byte> bytes1 = Vector128.Create(buf[bufPos], buf[bufPos + 1], buf[bufPos + 2],
                                                               buf[bufPos + 3], buf[bufPos + 4], buf[bufPos + 5],
                                                               buf[bufPos + 6], buf[bufPos + 7], buf[bufPos + 8],
                                                               buf[bufPos + 9], buf[bufPos + 10], buf[bufPos + 11],
                                                               buf[bufPos + 12], buf[bufPos + 13], buf[bufPos + 14],
                                                               buf[bufPos + 15]);

                    bufPos += 16;

                    Vector128 <byte> bytes2 = Vector128.Create(buf[bufPos], buf[bufPos + 1], buf[bufPos + 2],
                                                               buf[bufPos + 3], buf[bufPos + 4], buf[bufPos + 5],
                                                               buf[bufPos + 6], buf[bufPos + 7], buf[bufPos + 8],
                                                               buf[bufPos + 9], buf[bufPos + 10], buf[bufPos + 11],
                                                               buf[bufPos + 12], buf[bufPos + 13], buf[bufPos + 14],
                                                               buf[bufPos + 15]);

                    bufPos += 16;

                    /*
                     * Add previous block byte sum to v_s2.
                     */
                    v_s2 = AdvSimd.Add(v_s2, v_s1);

                    /*
                     * Horizontally add the bytes for s1.
                     */
                    v_s1 =
                        AdvSimd.AddPairwiseWideningAndAdd(v_s1,
                                                          AdvSimd.
                                                          AddPairwiseWideningAndAdd(AdvSimd.AddPairwiseWidening(bytes1),
                                                                                    bytes2));

                    /*
                     * Vertically add the bytes for s2.
                     */
                    v_column_sum_1 = AdvSimd.AddWideningLower(v_column_sum_1, bytes1.GetLower());
                    v_column_sum_2 = AdvSimd.AddWideningLower(v_column_sum_2, bytes1.GetUpper());
                    v_column_sum_3 = AdvSimd.AddWideningLower(v_column_sum_3, bytes2.GetLower());
                    v_column_sum_4 = AdvSimd.AddWideningLower(v_column_sum_4, bytes2.GetUpper());
                } while(--n != 0);

                v_s2 = AdvSimd.ShiftLeftLogical(v_s2, 5);

                /*
                 * Multiply-add bytes by [ 32, 31, 30, ... ] for s2.
                 */
                v_s2 = AdvSimd.MultiplyWideningLowerAndAdd(v_s2, v_column_sum_1.GetLower(),
                                                           Vector64.Create((ushort)32, 31, 30, 29));

                v_s2 = AdvSimd.MultiplyWideningLowerAndAdd(v_s2, v_column_sum_1.GetUpper(),
                                                           Vector64.Create((ushort)28, 27, 26, 25));

                v_s2 = AdvSimd.MultiplyWideningLowerAndAdd(v_s2, v_column_sum_2.GetLower(),
                                                           Vector64.Create((ushort)24, 23, 22, 21));

                v_s2 = AdvSimd.MultiplyWideningLowerAndAdd(v_s2, v_column_sum_2.GetUpper(),
                                                           Vector64.Create((ushort)20, 19, 18, 17));

                v_s2 = AdvSimd.MultiplyWideningLowerAndAdd(v_s2, v_column_sum_3.GetLower(),
                                                           Vector64.Create((ushort)16, 15, 14, 13));

                v_s2 = AdvSimd.MultiplyWideningLowerAndAdd(v_s2, v_column_sum_3.GetUpper(),
                                                           Vector64.Create((ushort)12, 11, 10, 9));

                v_s2 = AdvSimd.MultiplyWideningLowerAndAdd(v_s2, v_column_sum_4.GetLower(),
                                                           Vector64.Create((ushort)8, 7, 6, 5));

                v_s2 = AdvSimd.MultiplyWideningLowerAndAdd(v_s2, v_column_sum_4.GetUpper(),
                                                           Vector64.Create((ushort)4, 3, 2, 1));

                /*
                 * Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
                 */
                Vector64 <uint> sum1 = AdvSimd.AddPairwise(v_s1.GetLower(), v_s1.GetUpper());
                Vector64 <uint> sum2 = AdvSimd.AddPairwise(v_s2.GetLower(), v_s2.GetUpper());
                Vector64 <uint> s1s2 = AdvSimd.AddPairwise(sum1, sum2);
                s1 += AdvSimd.Extract(s1s2, 0);
                s2 += AdvSimd.Extract(s1s2, 1);

                /*
                 * Reduce.
                 */
                s1 %= Adler32Context.ADLER_MODULE;
                s2 %= Adler32Context.ADLER_MODULE;
            }

            /*
             * Handle leftover data.
             */
            if (len != 0)
            {
                if (len >= 16)
                {
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    s2  += s1 += buf[bufPos++];
                    len -= 16;
                }

                while (len-- != 0)
                {
                    s2 += s1 += buf[bufPos++];
                }

                if (s1 >= Adler32Context.ADLER_MODULE)
                {
                    s1 -= Adler32Context.ADLER_MODULE;
                }

                s2 %= Adler32Context.ADLER_MODULE;
            }

            /*
             * Return the recombined sums.
             */
            preSum1 = (ushort)(s1 & 0xFFFF);
            preSum2 = (ushort)(s2 & 0xFFFF);
        }

Example #9

Show file

File: XxHash3.Neon.cs Project: Pathoschild/smapi-mod-dump

    private static void Accumulate512Neon(ref Accumulator accumulator, byte *data, byte *secret)
    {
        if (UnrollCount > 2u)
        {
            var accumulatorVecLo0 = accumulator.Data128.Data0;
            var accumulatorVecLo1 = accumulator.Data128.Data1;
            var accumulatorVecLo2 = accumulator.Data128.Data2;
            var accumulatorVecLo3 = accumulator.Data128.Data3;

            var dataVec0 = AdvSimd.LoadVector128((ulong *)(data + 0x00u));
            var dataVec1 = AdvSimd.LoadVector128((ulong *)(data + 0x10u));
            var dataVec2 = AdvSimd.LoadVector128((ulong *)(data + 0x20u));
            var dataVec3 = AdvSimd.LoadVector128((ulong *)(data + 0x30u));
            var keyVec0  = AdvSimd.LoadVector128((ulong *)(secret + 0x00u));
            var keyVec1  = AdvSimd.LoadVector128((ulong *)(secret + 0x10u));
            var keyVec2  = AdvSimd.LoadVector128((ulong *)(secret + 0x20u));
            var keyVec3  = AdvSimd.LoadVector128((ulong *)(secret + 0x30u));

            var accumulatorVecHi0 = AdvSimd.ExtractVector128(dataVec0, dataVec0, 1);
            var accumulatorVecHi1 = AdvSimd.ExtractVector128(dataVec1, dataVec1, 1);
            var accumulatorVecHi2 = AdvSimd.ExtractVector128(dataVec2, dataVec2, 1);
            var accumulatorVecHi3 = AdvSimd.ExtractVector128(dataVec3, dataVec3, 1);

            var dataKey0 = AdvSimd.Xor(dataVec0, keyVec0);
            var dataKey1 = AdvSimd.Xor(dataVec1, keyVec1);
            var dataKey2 = AdvSimd.Xor(dataVec2, keyVec2);
            var dataKey3 = AdvSimd.Xor(dataVec3, keyVec3);

            var dataKeyLo0 = AdvSimd.ExtractNarrowingLower(dataKey0);
            var dataKeyLo1 = AdvSimd.ExtractNarrowingLower(dataKey1);
            var dataKeyLo2 = AdvSimd.ExtractNarrowingLower(dataKey2);
            var dataKeyLo3 = AdvSimd.ExtractNarrowingLower(dataKey3);
            var dataKeyHi0 = AdvSimd.ShiftRightLogicalNarrowingLower(dataKey0, 32);
            var dataKeyHi1 = AdvSimd.ShiftRightLogicalNarrowingLower(dataKey1, 32);
            var dataKeyHi2 = AdvSimd.ShiftRightLogicalNarrowingLower(dataKey2, 32);
            var dataKeyHi3 = AdvSimd.ShiftRightLogicalNarrowingLower(dataKey3, 32);

            accumulatorVecHi0 = AdvSimd.MultiplyWideningLowerAndAdd(accumulatorVecHi0, dataKeyLo0, dataKeyHi0);
            accumulatorVecHi1 = AdvSimd.MultiplyWideningLowerAndAdd(accumulatorVecHi1, dataKeyLo1, dataKeyHi1);
            accumulatorVecHi2 = AdvSimd.MultiplyWideningLowerAndAdd(accumulatorVecHi2, dataKeyLo2, dataKeyHi2);
            accumulatorVecHi3 = AdvSimd.MultiplyWideningLowerAndAdd(accumulatorVecHi3, dataKeyLo3, dataKeyHi3);

            var result0 = AdvSimd.Add(accumulatorVecLo0, accumulatorVecHi0);
            var result1 = AdvSimd.Add(accumulatorVecLo1, accumulatorVecHi1);
            var result2 = AdvSimd.Add(accumulatorVecLo2, accumulatorVecHi2);
            var result3 = AdvSimd.Add(accumulatorVecLo3, accumulatorVecHi3);

            accumulator.Data128.Data0 = result0;
            accumulator.Data128.Data1 = result1;
            accumulator.Data128.Data2 = result2;
            accumulator.Data128.Data3 = result3;
        }
        else if (UnrollCount == 2u)
        {
            for (uint i = 0u; i < StripeLength; i += 0x20u)
            {
                var accumulatorVecLo0 = accumulator.Data128.AtOffset(i + 0x00u);
                var accumulatorVecLo1 = accumulator.Data128.AtOffset(i + 0x10u);

                var dataVec0 = AdvSimd.LoadVector128((ulong *)(data + i + 0x00u));
                var dataVec1 = AdvSimd.LoadVector128((ulong *)(data + i + 0x10u));
                var keyVec0  = AdvSimd.LoadVector128((ulong *)(secret + i + 0x00u));
                var keyVec1  = AdvSimd.LoadVector128((ulong *)(secret + i + 0x10u));

                var accumulatorVecHi0 = AdvSimd.ExtractVector128(dataVec0, dataVec0, 1);
                var accumulatorVecHi1 = AdvSimd.ExtractVector128(dataVec1, dataVec1, 1);

                var dataKey0 = AdvSimd.Xor(dataVec0, keyVec0);
                var dataKey1 = AdvSimd.Xor(dataVec1, keyVec1);

                var dataKeyLo0 = AdvSimd.ExtractNarrowingLower(dataKey0);
                var dataKeyLo1 = AdvSimd.ExtractNarrowingLower(dataKey1);
                var dataKeyHi0 = AdvSimd.ShiftRightLogicalNarrowingLower(dataKey0, 32);
                var dataKeyHi1 = AdvSimd.ShiftRightLogicalNarrowingLower(dataKey1, 32);

                accumulatorVecHi0 = AdvSimd.MultiplyWideningLowerAndAdd(accumulatorVecHi0, dataKeyLo0, dataKeyHi0);
                accumulatorVecHi1 = AdvSimd.MultiplyWideningLowerAndAdd(accumulatorVecHi1, dataKeyLo1, dataKeyHi1);

                var result0 = AdvSimd.Add(accumulatorVecLo0, accumulatorVecHi0);
                var result1 = AdvSimd.Add(accumulatorVecLo1, accumulatorVecHi1);

                accumulator.Data128.AtOffset(i + 0x00u) = result0;
                accumulator.Data128.AtOffset(i + 0x10u) = result1;
            }
        }
        else
        {
            for (uint i = 0u; i < StripeLength; i += 0x10u)
            {
                var accumulatorVecLo = accumulator.Data128.AtOffset(i);

                var dataVec = AdvSimd.LoadVector128((ulong *)(data + i));
                var keyVec  = AdvSimd.LoadVector128((ulong *)(secret + i));

                var accumulatorVecHi = AdvSimd.ExtractVector128(dataVec, dataVec, 1);

                var dataKey = AdvSimd.Xor(dataVec, keyVec);

                var dataKeyLo = AdvSimd.ExtractNarrowingLower(dataKey);
                var dataKeyHi = AdvSimd.ShiftRightLogicalNarrowingLower(dataKey, 32);

                accumulatorVecHi = AdvSimd.MultiplyWideningLowerAndAdd(accumulatorVecHi, dataKeyLo, dataKeyHi);

                var result = AdvSimd.Add(accumulatorVecLo, accumulatorVecHi);

                accumulator.Data128.AtOffset(i) = result;
            }
        }
    }